In [1]:
! python --version
Python 3.11.9
In [86]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib import cm, colors
from matplotlib.lines import Line2D
from matplotlib.collections import PatchCollection
from matplotlib import patches

import seaborn as sns

import scipy.sparse

import geopandas as gpd

import scanpy as sc

import anndata as an
import scanorama

from pathlib import Path

import squidpy as sq

import os
import warnings

from shapely.geometry import Polygon, MultiPolygon, Point
from shapely.affinity import affine_transform

import math

sc.logging.print_versions()
-----
anndata     0.10.7
scanpy      1.10.0
-----
PIL                         10.3.0
annoy                       NA
anyio                       NA
asciitree                   NA
asttokens                   NA
attr                        23.1.0
attrs                       23.1.0
babel                       2.11.0
brotli                      1.0.9
certifi                     2024.02.02
cffi                        1.16.0
charset_normalizer          2.0.4
cloudpickle                 3.0.0
colorama                    0.4.6
comm                        0.2.1
cycler                      0.12.1
cython_runtime              NA
dask                        2024.5.0
dask_expr                   1.1.0
dask_image                  2023.08.1
datashader                  0.16.1
datatree                    0.0.14
dateutil                    2.8.2
debugpy                     1.6.7
decorator                   5.1.1
defusedxml                  0.7.1
docrep                      0.3.2
executing                   0.8.3
fastjsonschema              NA
fbpca                       NA
fsspec                      2023.6.0
geopandas                   0.14.4
h5py                        3.11.0
idna                        3.7
igraph                      0.11.5
imageio                     2.34.1
intervaltree                NA
ipykernel                   6.28.0
ipywidgets                  8.1.2
jedi                        0.18.1
jinja2                      3.1.3
joblib                      1.4.2
json5                       NA
jsonschema                  4.19.2
jsonschema_specifications   NA
jupyter_events              0.8.0
jupyter_server              2.10.0
jupyterlab_server           2.25.1
kiwisolver                  1.4.5
lazy_loader                 0.4
legacy_api_wrap             NA
leidenalg                   0.10.2
llvmlite                    0.42.0
markupsafe                  2.1.3
matplotlib                  3.8.2
matplotlib_inline           0.1.6
matplotlib_scalebar         0.8.1
mpl_toolkits                NA
msgpack                     1.0.8
multipledispatch            0.6.0
multiscale_spatial_image    0.11.2
natsort                     8.4.0
nbformat                    5.9.2
networkx                    3.3
numba                       0.59.1
numcodecs                   0.12.1
numpy                       1.26.3
ome_zarr                    NA
overrides                   NA
packaging                   23.2
pandas                      2.2.0
param                       2.1.0
parso                       0.8.3
patsy                       0.5.6
pkg_resources               NA
platformdirs                3.10.0
prometheus_client           NA
prompt_toolkit              3.0.43
psutil                      5.9.0
pure_eval                   0.2.2
pyarrow                     16.0.0
pycparser                   2.21
pyct                        0.5.0
pydev_ipython               NA
pydevconsole                NA
pydevd                      2.9.5
pydevd_file_utils           NA
pydevd_plugins              NA
pydevd_tracing              NA
pygeos                      0.14
pygments                    2.15.1
pyparsing                   3.1.2
pyproj                      3.6.1
pythoncom                   NA
pythonjsonlogger            NA
pytz                        2024.1
pywintypes                  NA
referencing                 NA
requests                    2.31.0
rfc3339_validator           0.1.4
rfc3986_validator           0.1.1
rich                        NA
rpds                        NA
scanorama                   1.7.4
scipy                       1.13.0
seaborn                     0.13.1
send2trash                  NA
session_info                1.0.0
setuptools                  69.5.1
shapely                     2.0.4
six                         1.16.0
skimage                     0.23.2
sklearn                     1.4.2
sniffio                     1.3.0
socks                       1.7.1
sortedcontainers            2.4.0
spatial_image               0.3.0
spatialdata                 0.0.15
squidpy                     1.4.1
stack_data                  0.2.0
statsmodels                 0.14.2
tblib                       3.0.0
texttable                   1.7.0
threadpoolctl               3.5.0
tifffile                    2024.5.10
tlz                         0.12.1
toolz                       0.12.1
tornado                     6.3.3
tqdm                        4.66.4
traitlets                   5.7.1
typing_extensions           NA
urllib3                     1.26.18
validators                  0.28.1
wcwidth                     0.2.5
websocket                   0.58.0
win32api                    NA
win32com                    NA
win32con                    NA
win32trace                  NA
winerror                    NA
xarray                      2023.12.0
xarray_dataclasses          1.7.0
xarray_schema               0.0.3
xrspatial                   0.4.0
yaml                        6.0.1
zarr                        2.18.0
zipp                        NA
zmq                         25.1.2
-----
IPython             8.20.0
jupyter_client      8.6.0
jupyter_core        5.5.0
jupyterlab          4.0.11
notebook            7.0.8
-----
Python 3.11.9 | packaged by Anaconda, Inc. | (main, Apr 19 2024, 16:40:41) [MSC v.1916 64 bit (AMD64)]
Windows-10-10.0.22631-SP0
-----
Session information updated at 2024-12-15 19:41
In [2]:
warnings.simplefilter("ignore")
In [4]:
custom_palette = [ "#a70e62", "#e10e3c", "#f47140", "#f0b932", "#ffd95a", "#c2ed51", "#8ed73d", "#489c73", "#1a8d83", 
                  "#4915be", "#2357d5", "#fed2ff", "#6ecad5", "#BD08F9", '#886fc2', "#e170c0", "#FF33CC", "#e4a9e8"]

# Read the list of genes, 200 spike genes subset 
genes_to_include_file =  "subset_spikegenes_list.txt"

with open(genes_to_include_file, "r") as file:
    genes_to_include = [line.strip() for line in file]

VGN1e1 = late double ridge, VRT-A2a (P1 WT)

VGN1e9 = late double ridge, VRT-A2b (P1 Pol)

VGN1b6 = lemma primorida, VRT-A2a (P1 WT)

VGN1b8 = lemma primorida, VRT-A2b (P1 Pol)

VGN1a6 = terminal spikelet, VRT-A2a (P1 WT)

VGN1a4 = terminal spikelet, VRT-A2b (P1 Pol)

VGN1c2 = carpel extension round, VRT-A2a (P1 WT)

VGN1c3 = carpel extension round, VRT-A2b (P1 Pol)

First I am going to read in my filtered & normalised adata objects for each sample¶

In [5]:
# Define the filenames of the saved AnnData objects
adata_filenames = {
    "adata_VGN1a6": "qc/adata_VGN1a6_clean.h5ad",
    "adata_VGN1a4": "qc/adata_VGN1a4_clean.h5ad",
    "adata_VGN1b6": "qc/adata_VGN1b6_clean.h5ad",
    "adata_VGN1b8": "qc/adata_VGN1b8_clean.h5ad",
    "adata_VGN1e1": "qc/adata_VGN1e1_clean.h5ad",
    "adata_VGN1e9": "qc/adata_VGN1e9_clean.h5ad",
    "adata_VGN1c2": "qc/adata_VGN1c2_clean.h5ad",
    "adata_VGN1c3": "qc/adata_VGN1c3_clean.h5ad"
}

# Initialize a dictionary to store the loaded AnnData objects
loaded_adata_objects = {}

# Load each file and store it in the dictionary
for name, filename in adata_filenames.items():
    loaded_adata_objects[name] = sc.read_h5ad(filename)
    print(f"Loaded {name} from {filename}")

# Access the loaded AnnData objects
adata_VGN1a6 = loaded_adata_objects["adata_VGN1a6"]
adata_VGN1a4 = loaded_adata_objects["adata_VGN1a4"]

adata_VGN1b6 = loaded_adata_objects["adata_VGN1b6"]
adata_VGN1b8 = loaded_adata_objects["adata_VGN1b8"]


adata_VGN1e1 = loaded_adata_objects["adata_VGN1e1"]
adata_VGN1e9 = loaded_adata_objects["adata_VGN1e9"]


adata_VGN1c2 = loaded_adata_objects["adata_VGN1c2"]
adata_VGN1c3 = loaded_adata_objects["adata_VGN1c3"]
Loaded adata_VGN1a6 from qc/adata_VGN1a6_clean.h5ad
Loaded adata_VGN1a4 from qc/adata_VGN1a4_clean.h5ad
Loaded adata_VGN1b6 from qc/adata_VGN1b6_clean.h5ad
Loaded adata_VGN1b8 from qc/adata_VGN1b8_clean.h5ad
Loaded adata_VGN1e1 from qc/adata_VGN1e1_clean.h5ad
Loaded adata_VGN1e9 from qc/adata_VGN1e9_clean.h5ad
Loaded adata_VGN1c2 from qc/adata_VGN1c2_clean.h5ad
Loaded adata_VGN1c3 from qc/adata_VGN1c3_clean.h5ad

Now I will look at clustering my data by integrating all the samples together with scanorama and performing leiden clustering¶

I am following this documentation: https://scanpy-tutorials.readthedocs.io/en/latest/spatial/integration-scanorama.html#Data-integration

In [7]:
adata_VGN1a6.obs['dataset'] = 'VGN1a6'
adata_VGN1a4.obs['dataset'] = 'VGN1a4'
adata_VGN1b6.obs['dataset'] = 'VGN1b6'
adata_VGN1b8.obs['dataset'] = 'VGN1b8'
adata_VGN1c2.obs['dataset'] = 'VGN1c2'
adata_VGN1c3.obs['dataset'] = 'VGN1c3'
adata_VGN1e1.obs['dataset'] = 'VGN1e1'
adata_VGN1e9.obs['dataset'] = 'VGN1e9'

adatas = [adata_VGN1a6, adata_VGN1a4, adata_VGN1b6, adata_VGN1b8, adata_VGN1c2, adata_VGN1c3, adata_VGN1e1, adata_VGN1e9]
keys = ['VGN1a6', 'VGN1a4', 'VGN1b6', 'VGN1b8', 'VGN1c2', 'VGN1c3', 'VGN1e1', 'VGN1e9'] 
# Add unique labels for concatenation

#create new AnnData objects and replace adata.X with the Scanorama-transformed cell-by-gene matrix, while keeping the other metadata in adata as well.
#The adata.X (expression score) will be corrected and scaled so that we can look between different samples and concatenate them into one. 
adatas_cor = scanorama.correct_scanpy(adatas, return_dimred=True)

# Concatenate corrected datasets
adata_spatial = sc.concat(
    adatas_cor,
    label="dataset",  # Use the 'dataset' column as label
    uns_merge="unique",
    keys=keys,
    index_unique="-",
)
# perform UMAP clustering of the concatenated samples
sc.pp.neighbors(adata_spatial, use_rep="X_scanorama")
sc.tl.umap(adata_spatial)

sc.tl.leiden(adata_spatial, resolution = 1, key_added="clusters", directed=False)
Found 200 genes among all datasets
[[0.         0.74560592 0.62879834 0.53725676 0.55078631 0.41609621
  0.36939792 0.36073932]
 [0.         0.         0.59564917 0.51637399 0.6898982  0.57102584
  0.19963785 0.18546845]
 [0.         0.         0.         0.65685809 0.4371547  0.2406768
  0.45359891 0.30592734]
 [0.         0.         0.         0.         0.4717608  0.37399146
  0.34314169 0.3486297 ]
 [0.         0.         0.         0.         0.         0.83118531
  0.33001358 0.27597196]
 [0.         0.         0.         0.         0.         0.
  0.51923947 0.46462715]
 [0.         0.         0.         0.         0.         0.
  0.         0.84384959]
 [0.         0.         0.         0.         0.         0.
  0.         0.        ]]
Processing datasets (6, 7)
Processing datasets (4, 5)
Processing datasets (0, 1)
Processing datasets (1, 4)
Processing datasets (2, 3)
Processing datasets (0, 2)
Processing datasets (1, 2)
Processing datasets (1, 5)
Processing datasets (0, 4)
Processing datasets (0, 3)
Processing datasets (5, 6)
Processing datasets (1, 3)
Processing datasets (3, 4)
Processing datasets (5, 7)
Processing datasets (2, 6)
Processing datasets (2, 4)
Processing datasets (0, 5)
Processing datasets (3, 5)
Processing datasets (0, 6)
Processing datasets (0, 7)
Processing datasets (3, 7)
Processing datasets (3, 6)
Processing datasets (4, 6)
Processing datasets (2, 7)
Processing datasets (4, 7)
Processing datasets (2, 5)
Processing datasets (1, 6)
Processing datasets (1, 7)
In [14]:
# Visualising the clusters we made by making the UMAP and spatial plots before plotting out individually 

# Define the keys in the required order
keys = ['VGN1e1', 'VGN1e9', 'VGN1b6', 'VGN1b8', 'VGN1a6', 'VGN1a4', 'VGN1c2', 'VGN1c3']

# Set plot background to black
plt.style.use('default')

# Create a figure with subplots
fig, axes = plt.subplots(3, 3, figsize=(20, 10))  # 2 rows, 3 columns

# Plot UMAP for "clusters"
sc.pl.umap(
    adata_spatial, color="clusters", palette=custom_palette, ax=axes[0, 0], show=False, legend_loc="on data", legend_fontsize=12)

axes[0, 0].set_title("UMAP Clusters")

# Define a dictionary to keep track of colors already added to the legend
legend_colors = {}

# Plot spatial plots for each sample
for i, sample in enumerate(keys):
    row, col = divmod(i + 1, 3)  # Adjust index to ensure all spatial plots are displayed
    ax = axes[row, col]
    if sample in ["VGN1a6", "VGN1b6", "VGN1e1", "VGN1e9", "VGN1b8", "VGN1a4"]:
        adata_sample = adata_spatial[adata_spatial.obs['dataset'] == sample].copy()
        # Swap x and y coordinates
        adata_sample.obsm["spatial"][:, [0, 1]] = adata_sample.obsm["spatial"][:, [1, 0]]
        if sample == "VGN1a6":
            # Rotate 90 degrees counterclockwise
            adata_sample.obsm["spatial"][:, 0] = adata_sample.obsm["spatial"][:, 0] * -1
        elif sample == "VGN1b6":
            # Rotate 90 degrees clockwise
            adata_sample.obsm["spatial"][:, 1] = adata_sample.obsm["spatial"][:, 1] * -1
        elif sample == "VGN1b8":
            # Rotate 90 degrees
            adata_sample.obsm["spatial"][:, 1] = adata_sample.obsm["spatial"][:, 1] * -1
        elif sample == "VGN1a4":
            # Rotate 90 degrees
            adata_sample.obsm["spatial"][:, 0] = adata_sample.obsm["spatial"][:, 0] * -1
        elif sample == "VGN1e1":
            adata_sample = adata_spatial[adata_spatial.obs['dataset'] == sample].copy()
            # Rotate 180 degrees separately for each axis
            adata_sample.obsm["spatial"][:, 0] *= -1
            adata_sample.obsm["spatial"][:, 1] *= -1
        elif sample == "VGN1e9":
            adata_sample = adata_spatial[adata_spatial.obs['dataset'] == sample].copy()
            # Rotate 180 degrees separately for each axis
            adata_sample.obsm["spatial"][:, 0] *= -1
            adata_sample.obsm["spatial"][:, 1] *= -1
        sc.pl.spatial(
            adata_sample,  # Filter data for the current sample
            color="clusters",
            spot_size=12,  # Provide a spot size value here
            title=f"Spatial Map for {sample}",  # Set a title for each plot
            ax=ax, show=False,
        )
    else:
        sc.pl.spatial(
            adata_spatial[adata_spatial.obs['dataset'] == sample],  # Filter data for the current sample
            color="clusters",
            spot_size=12,  # Provide a spot size value here
            title=f"Spatial Map for {sample}",  # Set a title for each plot
            ax=ax, show=False,
        )

# Adjust layout
plt.tight_layout()

# Remove legends from subplots
for ax in axes.flat:
    ax.legend().remove()

plt.subplots_adjust(hspace=0.15, wspace=0.15)


# Create a single legend for the entire figure without color indicators
handles, labels = fig.axes[7].get_legend_handles_labels()  # Get legend handles and labels from any subplot
# Print only the first 23 labels
fig.legend(handles[:18], labels[:18], loc='upper right', bbox_to_anchor=(0.99, 0.99), borderaxespad=0., ncol=2, fontsize='large', markerscale=2)
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
Out[14]:
<matplotlib.legend.Legend at 0x165a66c59d0>
No description has been provided for this image
In [7]:
sc.pl.umap(
    adata_spatial, color="clusters", palette=custom_palette, show=False, legend_loc="on data", legend_fontsize=12, legend_fontoutline= 1 , size=5)

plt.savefig("leidenclustering/UMAP_VRT_A2a_A2b_200genes_res1_white.svg", dpi=300, bbox_inches='tight', format='svg', transparent=True)
No description has been provided for this image

Saving cluster information into hdf5 file¶

In [15]:
#this file contains the same information in the index, I will use this for analysis 
file_path = "leidenclustering/VRT_A2a_A2b_leiden_200genes_res1_indexed.h5ad"
adata_spatial.write_h5ad(file_path)

#When I am importing .hdf5 files to the VizGen visualiser I need to remove all of the sample information included in the index
# Rename the index column to only include the numbers before the '-' character
adata_indexing = adata_spatial.copy()
adata_indexing.obs.index = [index.split('-')[0] for index in adata_indexing.obs.index]

# Assuming your AnnData object is named 'adata'
file_path = "leidenclustering/VRT_A2a_A2b_leiden_200genes_res1.hdf5"
# Save the AnnData object as an HDF5 file
adata_indexing.write_h5ad(file_path)

Creating a table with each cell and their domain assignment¶

Supplemental Table 9: Cell ID, domain assignment, sample id
In [8]:
cluster_info = adata_spatial.obs['clusters'].copy()
cluster_assignment_df = cluster_info.reset_index()
cluster_assignment_df[['cell_id', 'sample']] = cluster_assignment_df['index'].str.split('-', expand=True)
cluster_assignment_df.drop(columns=['index'], inplace=True)
cluster_assignment_df.to_csv("leidenclustering/DomainAssignment_PerCell.csv", index=False)
cluster_assignment_df
Out[8]:
clusters cell_id sample
0 0 2305551000002100247 VGN1a6
1 3 2305551000002100249 VGN1a6
2 9 2305551000002100250 VGN1a6
3 3 2305551000002100262 VGN1a6
4 6 2305551000002100268 VGN1a6
... ... ... ...
50726 4 2655507000138100169 VGN1e9
50727 4 2655507000138100170 VGN1e9
50728 0 2655507000138100176 VGN1e9
50729 4 2655507000138100177 VGN1e9
50730 0 2655507000138100178 VGN1e9

50731 rows × 3 columns

Calculating total cells per cluster and percentage of total cells per cluster¶

In [9]:
cluster_info = adata_spatial.obs['clusters'].copy()
df = cluster_info.reset_index()
df[['cell_id', 'sample']] = df['index'].str.split('-', expand=True)
df.drop(columns=['index'], inplace=True)
cluster_sample_counts = df.groupby(['clusters', 'sample']).size().reset_index(name='cell_count')

sample_order = ['VGN1e1', 'VGN1b6', 'VGN1a6', 'VGN1c2']

# Filter the DataFrame to include only the specified samples
cluster_sample_counts_filtered = cluster_sample_counts[cluster_sample_counts['sample'].isin(sample_order)]

# Set the 'sample' column as a categorical type with the specified order
cluster_sample_counts_filtered['sample'] = pd.Categorical(cluster_sample_counts_filtered['sample'], categories=sample_order, ordered=True)

# Sort the DataFrame by the 'sample' column based on the categorical order
cluster_sample_counts_ordered = cluster_sample_counts_filtered.sort_values('sample')

# Calculate the total number of cells per sample
total_cells_per_sample = cluster_sample_counts_ordered.groupby('sample')['cell_count'].sum().reset_index(name='total_cells')

# Merge the total cells with the original filtered DataFrame
cluster_sample_counts_w_total = pd.merge(cluster_sample_counts_ordered, total_cells_per_sample, on='sample')

# Calculate the percentage of cells per cluster in each sample
cluster_sample_counts_w_total['percentage'] = (cluster_sample_counts_w_total['cell_count'] / cluster_sample_counts_w_total['total_cells']) * 100


cluster_sample_counts_w_total.to_csv("leidenclustering/percentages_Fig2f.csv", index=False)
cluster_sample_counts_w_total
Out[9]:
clusters sample cell_count total_cells percentage
0 8 VGN1e1 6 2209 0.271616
1 16 VGN1e1 0 2209 0.000000
2 15 VGN1e1 1 2209 0.045269
3 14 VGN1e1 0 2209 0.000000
4 13 VGN1e1 116 2209 5.251245
... ... ... ... ... ...
67 10 VGN1c2 871 15185 5.735924
68 3 VGN1c2 349 15185 2.298321
69 13 VGN1c2 399 15185 2.627593
70 9 VGN1c2 598 15185 3.938097
71 11 VGN1c2 362 15185 2.383932

72 rows × 5 columns

In [10]:
plt.style.use('default')

# Define the custom cluster order
cluster_order = ['17', '16', '14', '15', '10', '7', '8', '13', '11', '9', '5', '6', '2', '12', '1', '4', '0', '3']

# Pivot the DataFrame so that clusters are the index, samples are the columns, and percentage of cells are the values
pivot_df_percentage = cluster_sample_counts_w_total.pivot_table(index='clusters', columns='sample', values='percentage', fill_value=0)

# Reorder the columns according to the custom cluster order
pivot_df_percentage = pivot_df_percentage.reindex(cluster_order)

# Sample-to-stage mapping
sample_to_stage = {
    'VGN1e1': 'W2.5',
    'VGN1b6': 'W3.25',
    'VGN1a6': 'W4',
    'VGN1c2': 'W5'
}

# Map the columns (sample IDs) to their corresponding stages
pivot_df_percentage.rename(columns=sample_to_stage, inplace=True)

# Set font sizes for the entire plot
plt.rc('font', size=14)          # Global font size
plt.rc('axes', titlesize=20)     # Title font size
plt.rc('axes', labelsize=20)     # Axis labels font size
plt.rc('xtick', labelsize=20)    # X-axis tick labels font size
plt.rc('ytick', labelsize=20)    # Y-axis tick labels font size
plt.rc('legend', fontsize=25)    # Legend font size

# Create the bar plot
ax = pivot_df_percentage.plot(kind='barh', stacked=True, figsize=(10, 8), color=['#410052', '#34618D', '#7CD250', '#FDE725'], edgecolor='none')

# Set labels and title
ax.set_xlabel('Percentage of Cells', fontsize=16)
ax.set_ylabel('Cluster Number', fontsize=16)
ax.set_title('Percentage of Cells per Cluster by Sample', fontsize=18)

# Customize the spines (borders)
ax.spines['top'].set_color('white')   # Set top border to white (invisible)
ax.spines['right'].set_color('white') # Set right border to white (invisible)
ax.spines['bottom'].set_color('black') # Set x-axis spine (bottom) to black
ax.spines['left'].set_color('black')   # Set y-axis spine (left) to black
ax.spines['bottom'].set_linewidth(1.5) # Optionally, increase the thickness of x-axis
ax.spines['left'].set_linewidth(1.5)   # Optionally, increase the thickness of y-axis

# Add legend outside the plot
plt.legend(title='Developmental Stage', bbox_to_anchor=(1.05, 1), loc='upper left')

plt.tight_layout()
plt.savefig("leidenclustering/Percentage_Cells_PerCluster_bystage.svg", dpi=300, bbox_inches='tight', format='svg', transparent=True)
plt.show()
No description has been provided for this image

Gene enrichment analysis¶

Logistic Regression Description: This method models the probability of a gene being expressed in one cluster versus others A higher score means that the gene is more likely to be associated with the cluster in question. Conversely, lower or negative scores suggest that the gene is less expressed in that cluster relative to others.

See publication: A discriminative learning approach to differential expression analysis for single-cell RNA-seq

In [10]:
sc.tl.rank_genes_groups(adata_spatial, groupby='clusters', method='logreg')
In [11]:
# Inspect the keys in the rank_genes_groups result
result = adata_spatial.uns['rank_genes_groups']
print(result.keys())
dict_keys(['params', 'names', 'scores'])

i want these values exported as a dataframe containing Gene, LogReg Score, and normalised counts (by sample)¶

(Supplementary Table 10)

In [12]:
#loading my adata objects again 
adata_filenames = {
    "adata_VGN1a6": "qc/adata_VGN1a6_clean.h5ad",
    "adata_VGN1a4": "qc/adata_VGN1a4_clean.h5ad",
    "adata_VGN1b6": "qc/adata_VGN1b6_clean.h5ad",
    "adata_VGN1b8": "qc/adata_VGN1b8_clean.h5ad",
    "adata_VGN1e1": "qc/adata_VGN1e1_clean.h5ad",
    "adata_VGN1e9": "qc/adata_VGN1e9_clean.h5ad",
    "adata_VGN1c2": "qc/adata_VGN1c2_clean.h5ad",
    "adata_VGN1c3": "qc/adata_VGN1c3_clean.h5ad"
}

# Initialize a dictionary to store the loaded AnnData objects
loaded_adata_objects = {}

# Load each file and store it in the dictionary
for name, filename in adata_filenames.items():
    loaded_adata_objects[name] = sc.read_h5ad(filename)
    print(f"Loaded {name} from {filename}")

# Access the loaded AnnData objects
adata_VGN1a6 = loaded_adata_objects["adata_VGN1a6"]
adata_VGN1a4 = loaded_adata_objects["adata_VGN1a4"]

adata_VGN1b6 = loaded_adata_objects["adata_VGN1b6"]
adata_VGN1b8 = loaded_adata_objects["adata_VGN1b8"]


adata_VGN1e1 = loaded_adata_objects["adata_VGN1e1"]
adata_VGN1e9 = loaded_adata_objects["adata_VGN1e9"]


adata_VGN1c2 = loaded_adata_objects["adata_VGN1c2"]
adata_VGN1c3 = loaded_adata_objects["adata_VGN1c3"]
Loaded adata_VGN1a6 from qc/adata_VGN1a6_clean.h5ad
Loaded adata_VGN1a4 from qc/adata_VGN1a4_clean.h5ad
Loaded adata_VGN1b6 from qc/adata_VGN1b6_clean.h5ad
Loaded adata_VGN1b8 from qc/adata_VGN1b8_clean.h5ad
Loaded adata_VGN1e1 from qc/adata_VGN1e1_clean.h5ad
Loaded adata_VGN1e9 from qc/adata_VGN1e9_clean.h5ad
Loaded adata_VGN1c2 from qc/adata_VGN1c2_clean.h5ad
Loaded adata_VGN1c3 from qc/adata_VGN1c3_clean.h5ad
In [13]:
# List of adata objects and their corresponding output file paths
adatas = [
    (adata_VGN1a6, "VGN1a6"),
    (adata_VGN1a4, "VGN1a4"),
    (adata_VGN1b6, "VGN1b6"),
    (adata_VGN1b8, "VGN1b8"),
    (adata_VGN1c2, "VGN1c2"),
    (adata_VGN1c3, "VGN1c3"),
    (adata_VGN1e1, "VGN1e1"),
    (adata_VGN1e9, "VGN1e9")
]

# making a dataframe with cluster assignments
clusters_df = pd.DataFrame(adata_spatial.obs['clusters'])
clusters_df.index = clusters_df.index.str.split('-').str[0]

# Dictionary to store DataFrames
dataframes = {}

# extracting normalised expression counts
def process_adata(adata, clusters_df, df_name):
    # Convert sparse matrix to dense if necessary
    if isinstance(adata.X, scipy.sparse.spmatrix):
        X_dense = adata.X.toarray()
    else:
        X_dense = adata.X
    
    # Create gene expression matrix dataframe
    cell_names = adata.obs_names
    gene_names = adata.var_names
    gene_expression_matrix = pd.DataFrame(data=X_dense, index=cell_names, columns=gene_names)
    
    # Merge df_clusters with df_counts on index (cell ID)
    merged_df = pd.merge(gene_expression_matrix, clusters_df, left_index=True, right_index=True)
    
    # Group by clusters and calculate the average counts for each transcript ID
    cluster_counts = merged_df.groupby('clusters').mean()
    cluster_counts = cluster_counts.transpose()
    
    # Replace columns with NaN in cases of few cells (very low represented cell groups) 
    if df_name in ['VGN1e1', 'VGN1e9']:
        cluster_counts.loc[:, ['7', '5', '15', '8']] = np.nan
    elif df_name in ['VGN1b6', 'VGN1b8']:
        cluster_counts.loc[:, ['7', '15']] = np.nan
    elif df_name in ['VGN1a4', 'VGN1a6']:
        cluster_counts.loc[:, ['17']] = np.nan
    
    # Store the DataFrame in the dictionary
    dataframes[df_name] = cluster_counts

# Process each adata object and store the results in dataframes dictionary
for adata, df_name in adatas:
    process_adata(adata, clusters_df, df_name)

# Access the DataFrames using the keys
VGN1a6 = dataframes['VGN1a6']
VGN1a4 = dataframes['VGN1a4']
VGN1b6 = dataframes['VGN1b6']
VGN1b8 = dataframes['VGN1b8']
VGN1c2 = dataframes['VGN1c2']
VGN1c3 = dataframes['VGN1c3']
VGN1e1 = dataframes['VGN1e1']
VGN1e9 = dataframes['VGN1e9']
In [14]:
# save results of rank gene groups
result = adata_spatial.uns['rank_genes_groups']

# Get the cluster names
cluster_names = result['names'].dtype.names

# Initialize lists to store data
genes = []
scores = []
clusters = []

# Iterate over each cluster to populate lists
for cluster in cluster_names:
    cluster_genes = result['names'][cluster]  # Get gene names for the cluster
    cluster_scores = result['scores'][cluster]  # Get scores for the cluster
    
    # Append each gene and its corresponding score to the lists
    for gene, score in zip(cluster_genes, cluster_scores):
        genes.append(gene)
        scores.append(score)
        clusters.append(cluster)  # Add the cluster name

# Create a DataFrame
markergene_df = pd.DataFrame({
    'gene': genes,
    'score': scores,
    'cluster': clusters
})


# Restructure the dataframe using pivot
restructured_df = markergene_df.pivot(
    index='gene',       # Use 'gene' as the index
    columns='cluster',  # Use 'cluster' to create new columns
    values='score'      # Populate the new columns with 'score' values
)

# Rename columns to include '_EnrichmentScore' suffix
restructured_df.columns = [f"{cluster}_EnrichmentScore" for cluster in restructured_df.columns]

# Reset index to make 'gene' a regular column
restructured_df.reset_index(inplace=True)

# Sort enrichment score columns numerically
enrichment_cols = sorted(
    [col for col in restructured_df.columns if '_EnrichmentScore' in col],  # Filter for enrichment score columns
    key=lambda x: int(x.split('_')[0])  # Extract numeric part and sort
)

# Reorder the dataframe columns
restructured_df = restructured_df[['gene'] + enrichment_cols]

restructured_df
Out[14]:
gene 0_EnrichmentScore 1_EnrichmentScore 2_EnrichmentScore 3_EnrichmentScore 4_EnrichmentScore 5_EnrichmentScore 6_EnrichmentScore 7_EnrichmentScore 8_EnrichmentScore 9_EnrichmentScore 10_EnrichmentScore 11_EnrichmentScore 12_EnrichmentScore 13_EnrichmentScore 14_EnrichmentScore 15_EnrichmentScore 16_EnrichmentScore 17_EnrichmentScore
0 TraesCS1A02G052000 -1.899248 -0.265776 -1.618058 -0.018310 0.812011 -1.089552 -0.354154 -1.914170 -0.421712 -0.826297 -1.008212 -0.002280 1.397553 -0.645299 1.715561 2.230191 0.119648 3.788102
1 TraesCS1A02G077800 -3.895693 -0.690553 -2.864961 -1.759157 -0.803881 -1.694246 5.860512 3.174781 -1.542214 0.982594 0.605621 -1.146341 -1.382980 5.235276 -0.493602 -0.419315 0.979379 -0.145220
2 TraesCS1A02G154900 -0.840187 -0.257462 -0.157702 2.576114 0.632968 -0.774375 -0.272984 -0.132877 -0.479977 1.849547 -0.442009 0.202528 -0.909859 -0.269406 -0.000572 -0.330262 -0.221848 -0.171638
3 TraesCS1A02G156100 -2.038370 -0.190227 -0.412845 -0.868299 0.177898 1.730197 0.238617 0.860592 -0.565050 -0.797859 0.608742 2.955381 -0.748560 -1.093605 1.253467 -0.466003 -0.414292 -0.229786
4 TraesCS1A02G199600 2.371270 -0.677488 2.211071 -2.370305 -1.328793 1.440920 -0.629660 0.245045 -0.228128 -0.972547 -0.781369 2.029766 0.734890 1.045224 -0.880091 -1.509792 -0.723550 0.023538
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
195 TraesCS7D02G342300 -1.323906 -0.532578 -1.139863 -0.538103 3.260145 -1.136548 -0.870831 0.567301 -0.666863 -0.320511 -0.409740 2.506989 -1.027976 -0.739799 1.096642 1.756481 -0.010124 -0.470718
196 TraesCS7D02G379200 -0.698107 -0.069254 -0.706524 -0.848616 -0.062484 2.485972 -0.639519 -0.449304 0.007441 -0.439396 0.193667 2.389086 0.058641 -0.327640 0.217816 -0.309713 -0.305536 -0.496528
197 TraesCS7D02G388600 -0.359088 0.125317 -0.269723 1.336437 0.737247 -0.095077 -0.761950 -0.204690 0.696411 0.055025 -0.285617 -0.173296 -0.391878 0.197854 -0.350791 -0.328775 0.035662 0.036933
198 TraesCS7D02G521200 0.681640 -0.897675 -1.102229 -0.981968 -1.312759 -0.640440 2.357756 0.062453 0.057815 -0.553182 -0.377786 -0.071443 0.849560 0.140697 -0.332130 0.104542 0.190743 1.824404
199 TraesCSU02G093200 0.042112 -0.337133 0.148754 -0.008615 0.248737 0.388551 -0.155734 -0.681636 0.523509 0.307238 -0.293516 -0.272808 -0.033067 -0.092026 -0.127859 0.111299 0.130460 0.101733

200 rows × 19 columns

In [15]:
# Ensure 'gene' is the index in the restructured dataframe for alignment
restructured_df.set_index('gene', inplace=True)

# Initialize the base dataframe with enrichment scores
enrichmentscore_normexp_combined_df = restructured_df.copy()

# Iterate over the sample-specific dataframes in the dictionary
for sample, sample_df in dataframes.items():
    # Reset index to bring 'gene' into a column
    sample_df = sample_df.reset_index()

    # Align the sample dataframe with the restructured dataframe on 'gene'
    sample_df = sample_df.set_index('index')  # Use 'index' (former gene names) as the new index
    sample_df = sample_df.rename_axis('gene')  # Rename the index to 'gene'

    # Add sample-specific prefixes to columns
    sample_df = sample_df.rename(columns=lambda col: f"{sample}_{col}")

    # Merge the sample data with the combined dataframe
    enrichmentscore_normexp_combined_df = enrichmentscore_normexp_combined_df.join(sample_df, how='left')

# Reset the index to make 'gene' a regular column
enrichmentscore_normexp_combined_df.reset_index(inplace=True)

# Desired sample order
sample_order = ['VGN1e1', 'VGN1e9', 'VGN1b6', 'VGN1b8', 'VGN1a6', 'VGN1a4', 'VGN1c2', 'VGN1c3']

# Get the list of expression columns to order
expression_columns = [col for col in enrichmentscore_normexp_combined_df.columns if any(sample in col for sample in sample_order)]

# Sort the expression columns by the desired sample order
ordered_expression_columns = sorted(expression_columns, key=lambda col: sample_order.index(col.split('_')[0]))

# Keep non-expression columns (e.g., 'gene', enrichment scores) in the beginning
non_expression_columns = [col for col in enrichmentscore_normexp_combined_df.columns if col not in expression_columns]

# Reorder the dataframe columns
enrichmentscore_normexp_combined_df = enrichmentscore_normexp_combined_df[non_expression_columns + ordered_expression_columns]

# Save or inspect the final dataframe


enrichmentscore_normexp_combined_df
Out[15]:
gene 0_EnrichmentScore 1_EnrichmentScore 2_EnrichmentScore 3_EnrichmentScore 4_EnrichmentScore 5_EnrichmentScore 6_EnrichmentScore 7_EnrichmentScore 8_EnrichmentScore ... VGN1c3_8 VGN1c3_9 VGN1c3_10 VGN1c3_11 VGN1c3_12 VGN1c3_13 VGN1c3_14 VGN1c3_15 VGN1c3_16 VGN1c3_17
0 TraesCS1A02G052000 -1.899248 -0.265776 -1.618058 -0.018310 0.812011 -1.089552 -0.354154 -1.914170 -0.421712 ... 0.016740 0.027439 0.045747 0.047847 0.017383 0.021586 0.353534 0.229726 0.063127 0.372287
1 TraesCS1A02G077800 -3.895693 -0.690553 -2.864961 -1.759157 -0.803881 -1.694246 5.860512 3.174781 -1.542214 ... 0.141597 0.403543 0.189756 0.024777 0.149514 0.996244 0.011831 0.059267 0.131254 0.064863
2 TraesCS1A02G154900 -0.840187 -0.257462 -0.157702 2.576114 0.632968 -0.774375 -0.272984 -0.132877 -0.479977 ... 0.005315 0.051778 0.005273 0.015205 0.055854 0.017627 0.015629 0.004341 0.013033 0.009766
3 TraesCS1A02G156100 -2.038370 -0.190227 -0.412845 -0.868299 0.177898 1.730197 0.238617 0.860592 -0.565050 ... 0.058267 0.040335 0.102397 0.302900 0.172868 0.087936 0.159085 0.060469 0.041710 0.053610
4 TraesCS1A02G199600 2.371270 -0.677488 2.211071 -2.370305 -1.328793 1.440920 -0.629660 0.245045 -0.228128 ... 0.090973 0.028937 0.025283 0.181941 0.220940 0.138797 0.030069 0.029664 0.031233 0.050177
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
195 TraesCS7D02G342300 -1.323906 -0.532578 -1.139863 -0.538103 3.260145 -1.136548 -0.870831 0.567301 -0.666863 ... 0.009702 0.002303 0.090308 0.229741 0.000000 0.010370 0.180497 0.195012 0.085941 0.124137
196 TraesCS7D02G379200 -0.698107 -0.069254 -0.706524 -0.848616 -0.062484 2.485972 -0.639519 -0.449304 0.007441 ... 0.026119 0.011584 0.051343 0.189548 0.000000 0.023589 0.042417 0.010876 0.043627 0.009813
197 TraesCS7D02G388600 -0.359088 0.125317 -0.269723 1.336437 0.737247 -0.095077 -0.761950 -0.204690 0.696411 ... 0.084736 0.091440 0.049619 0.095866 0.133632 0.061324 0.048247 0.062308 0.032471 0.053517
198 TraesCS7D02G521200 0.681640 -0.897675 -1.102229 -0.981968 -1.312759 -0.640440 2.357756 0.062453 0.057815 ... 0.032484 0.015165 0.095812 0.096225 0.020511 0.034811 0.162881 0.227820 0.138573 0.389201
199 TraesCSU02G093200 0.042112 -0.337133 0.148754 -0.008615 0.248737 0.388551 -0.155734 -0.681636 0.523509 ... 0.051506 0.057186 0.048762 0.034607 0.000000 0.078886 0.035634 0.060346 0.055439 0.055034

200 rows × 163 columns

In [19]:
enrichmentscore_normexp_combined_df2 = enrichmentscore_normexp_combined_df.copy()

# Define the mapping of replacements
replacement_map = {
    'VGN1e1': 'LDR_WT_cluster',
    'VGN1e9': 'LDR_VRT-A2b_cluster',
    'VGN1b6': 'LP_WT_cluster',
    'VGN1b8': 'LP_VRT-A2b_cluster',
    'VGN1a6': 'TS_WT_cluster',
    'VGN1a4': 'TS_VRT-A2b_cluster',
    'VGN1c2': 'CER_WT_cluster',
    'VGN1c3': 'CER_VRT-A2b_cluster'
}

# Rename columns using the mapping
def rename_columns(col_name):
    for old_value, new_value in replacement_map.items():
        if col_name.startswith(old_value):
            return col_name.replace(old_value, new_value)
    return col_name

enrichmentscore_normexp_combined_df2.rename(columns=rename_columns, inplace=True)

enrichmentscore_normexp_combined_df2

enrichmentscore_normexp_combined_df2.to_csv("enrichmentanalysis/LogRegScore_NormExpAvg_allsamples.csv", index=False)

Now I want to select the top marker genes for each cluster and put these in a table¶

First I need to select what the 'cut off' is for enriched genes so I will look at the distribution

Then I will determine a point at which I consider the gene enriched with a +2 STD cut off point

In [155]:
# Calculate mean and standard deviation of the 'score' column
mean_score = markergene_df['score'].mean()
std_score = markergene_df['score'].std()

# Calculate the +2 SD cutoff
cutoff = mean_score + 2 * std_score

print(f"Mean: {mean_score}, Standard Deviation: {std_score}")
print(f"Cutoff for significantly enriched scores: {cutoff}")

# Filter the DataFrame for significant scores
significant_scores_df = markergene_df[markergene_df['score'] > cutoff]

# Display significant scores
print(significant_scores_df)

plt.figure(figsize=(10, 6))
plt.hist(markergene_df['score'], bins=50, color='skyblue', edgecolor='black', alpha=0.7)
plt.axvline(cutoff, color='red', linestyle='--', linewidth=2, label=f'Cutoff (+2 SD = {cutoff:.2f})')
plt.title('Frequency Distribution of Scores', fontsize=16)
plt.xlabel('Score', fontsize=14)
plt.ylabel('Frequency', fontsize=14)
plt.legend(fontsize=12)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()
Mean: 0.0, Standard Deviation: 1.8383444547653198
Cutoff for significantly enriched scores: 3.6766889095306396
                    gene      score cluster
0     TraesCS4A02G256700   9.094888       0
1     TraesCS4B02G064000   6.216168       0
2     TraesCS3D02G284200   4.588726       0
200   TraesCS4D02G296400  13.337225       1
201   TraesCS6D02G220400   8.891061       1
...                  ...        ...     ...
3403  TraesCS7D02G261600   6.040273      17
3404  TraesCS1A02G264300   5.881382      17
3405  TraesCS6D02G220400   4.888692      17
3406  TraesCS2B02G464200   3.843694      17
3407  TraesCS1A02G052000   3.788102      17

[114 rows x 3 columns]
No description has been provided for this image

So for all clusters, the +2 STD cut off is 3.68. I will calculate this for each cluster individually to see if this is very variable by cluster.

In [156]:
# Group the dataframe by 'cluster' and process each group
cluster_results = {}  # To store results for each cluster

for cluster, group in markergene_df.groupby('cluster'):
    # Calculate mean and standard deviation for the current cluster
    mean_score = group['score'].mean()
    std_score = group['score'].std()
    
    # Calculate the +2 SD cutoff
    cutoff = mean_score + 2 * std_score
    
    # Filter significant scores for the current cluster
    significant_scores = group[group['score'] > cutoff]
    
    # Store results
    cluster_results[cluster] = {
        'mean': mean_score,
        'std': std_score,
        'cutoff': cutoff,
        'significant_scores': significant_scores
    }
    
    # Print summary for this cluster
    print(f"Cluster: {cluster}")
    print(f"  Mean: {mean_score:.2f}")
    print(f"  Standard Deviation: {std_score:.2f}")
    print(f"  Cutoff (+2 SD): {cutoff:.2f}")
    print(f"  Significant Scores Count: {len(significant_scores)}")
    print()

# Combine all significant scores into a single DataFrame (optional)
all_significant_scores = pd.concat([res['significant_scores'] for res in cluster_results.values()])
Cluster: 0
  Mean: -0.32
  Standard Deviation: 1.92
  Cutoff (+2 SD): 3.51
  Significant Scores Count: 3

Cluster: 1
  Mean: -0.11
  Standard Deviation: 2.04
  Cutoff (+2 SD): 3.98
  Significant Scores Count: 6

Cluster: 10
  Mean: 0.03
  Standard Deviation: 1.54
  Cutoff (+2 SD): 3.12
  Significant Scores Count: 9

Cluster: 11
  Mean: -0.13
  Standard Deviation: 1.80
  Cutoff (+2 SD): 3.46
  Significant Scores Count: 5

Cluster: 12
  Mean: 0.12
  Standard Deviation: 1.92
  Cutoff (+2 SD): 3.96
  Significant Scores Count: 6

Cluster: 13
  Mean: 0.14
  Standard Deviation: 1.71
  Cutoff (+2 SD): 3.55
  Significant Scores Count: 6

Cluster: 14
  Mean: 0.08
  Standard Deviation: 1.56
  Cutoff (+2 SD): 3.20
  Significant Scores Count: 10

Cluster: 15
  Mean: 0.11
  Standard Deviation: 1.92
  Cutoff (+2 SD): 3.96
  Significant Scores Count: 6

Cluster: 16
  Mean: 0.20
  Standard Deviation: 1.76
  Cutoff (+2 SD): 3.72
  Significant Scores Count: 7

Cluster: 17
  Mean: 0.11
  Standard Deviation: 1.54
  Cutoff (+2 SD): 3.20
  Significant Scores Count: 10

Cluster: 2
  Mean: -0.04
  Standard Deviation: 1.85
  Cutoff (+2 SD): 3.66
  Significant Scores Count: 5

Cluster: 3
  Mean: -0.22
  Standard Deviation: 1.98
  Cutoff (+2 SD): 3.74
  Significant Scores Count: 8

Cluster: 4
  Mean: -0.11
  Standard Deviation: 2.06
  Cutoff (+2 SD): 4.00
  Significant Scores Count: 7

Cluster: 5
  Mean: -0.05
  Standard Deviation: 1.87
  Cutoff (+2 SD): 3.70
  Significant Scores Count: 4

Cluster: 6
  Mean: -0.05
  Standard Deviation: 1.87
  Cutoff (+2 SD): 3.69
  Significant Scores Count: 8

Cluster: 7
  Mean: 0.17
  Standard Deviation: 1.95
  Cutoff (+2 SD): 4.07
  Significant Scores Count: 5

Cluster: 8
  Mean: 0.12
  Standard Deviation: 1.81
  Cutoff (+2 SD): 3.74
  Significant Scores Count: 3

Cluster: 9
  Mean: -0.03
  Standard Deviation: 1.84
  Cutoff (+2 SD): 3.65
  Significant Scores Count: 8

The scores are not very variable, they vary from 3.12-4.07 But I will apply these for each for the clusters to get my 'top genes' and see how many markers for each this gives me

In [157]:
# Create an empty list to collect filtered data for each cluster
filtered_data = []

# Dictionary to store row counts for each cluster
cluster_row_counts = {}

# Loop through each cluster group
for cluster, group in markergene_df.groupby('cluster'):
    # Calculate mean and standard deviation for the current cluster
    mean_score = group['score'].mean()
    std_score = group['score'].std()
    
    # Calculate the +2 SD cutoff
    cutoff = mean_score + 2 * std_score
    
    # Filter the group for scores above the cutoff
    filtered_group = group[group['score'] > cutoff]
    
    # Append the filtered group to the list
    filtered_data.append(filtered_group)
    
    # Store the number of rows in the filtered group
    cluster_row_counts[cluster] = len(filtered_group)

# Combine all filtered groups into a single DataFrame
filtered_markergene_df = pd.concat(filtered_data)

# Reset index for clarity (optional)
filtered_markergene_df.reset_index(drop=True, inplace=True)

# Print the row counts for each cluster
print("Number of rows per cluster in the filtered DataFrame:")
for cluster, count in cluster_row_counts.items():
    print(f"Cluster {cluster}: {count} genes")

# Display the total number of rows in the filtered DataFrame
print(f"\nTotalfiltered marker genes: {len(filtered_markergene_df)}")

# Display the filtered DataFrame (optional)
print(filtered_markergene_df)
Number of rows per cluster in the filtered DataFrame:
Cluster 0: 3 genes
Cluster 1: 6 genes
Cluster 10: 9 genes
Cluster 11: 5 genes
Cluster 12: 6 genes
Cluster 13: 6 genes
Cluster 14: 10 genes
Cluster 15: 6 genes
Cluster 16: 7 genes
Cluster 17: 10 genes
Cluster 2: 5 genes
Cluster 3: 8 genes
Cluster 4: 7 genes
Cluster 5: 4 genes
Cluster 6: 8 genes
Cluster 7: 5 genes
Cluster 8: 3 genes
Cluster 9: 8 genes

Totalfiltered marker genes: 116
                   gene      score cluster
0    TraesCS4A02G256700   9.094888       0
1    TraesCS4B02G064000   6.216168       0
2    TraesCS3D02G284200   4.588726       0
3    TraesCS4D02G296400  13.337225       1
4    TraesCS6D02G220400   8.891061       1
..                  ...        ...     ...
111  TraesCS7B02G413900   5.830931       9
112  TraesCS7A02G175200   4.953066       9
113  TraesCS5A02G356100   4.799537       9
114  TraesCS2B02G399800   4.534127       9
115  TraesCS5A02G473800   3.859297       9

[116 rows x 3 columns]

Now I want to annotate these markers with their names¶

This will be used for Supplementary Table H, with added annotations applied by hand (including literature and label name)

In [ ]:
annotation = pd.read_csv('annotated_names_12_24.csv').dropna()
annotation = annotation.rename(columns={'gene_id': 'gene'})

annotation_subset = annotation[['gene', 'annotated_name']]

# Merge filtered_markergene_df with the annotation_subset on the 'gene' column
filtered_markergene_df_annotated = filtered_markergene_df.merge(annotation_subset, on='gene', how = 'left')

filtered_markergene_df_annotated['annotated_name'] = filtered_markergene_df_annotated['annotated_name'].fillna('')

filtered_markergene_df_annotated
In [170]:
formatted_markergene_df = (
    filtered_markergene_df_annotated.drop(columns='score')
    .groupby('cluster')
    .agg({
        'gene': lambda x: ', '.join(filter(None, x)),  # Skip empty strings
        'annotated_name': lambda x: ', '.join(filter(None, x))  # Skip empty strings
    })
    .reset_index()
)

# Display the formatted DataFrame
formatted_markergene_df.to_csv("enrichmentanalysis/topenrichedgenes_domainnames.csv", index=False)
formatted_markergene_df
Out[170]:
cluster gene annotated_name
0 0 TraesCS4A02G256700, TraesCS4B02G064000, TraesC... TraesCS4A02G256700_TaKNOX5, TraesCS4B02G064000...
1 1 TraesCS4D02G296400, TraesCS6D02G220400, TraesC... TraesCS4D02G296400, TraesCS6D02G220400_TaYABBY...
2 10 TraesCS7A02G383800, TraesCS7D02G261600, TraesC... TraesCS7A02G383800_AP3_OsMADS16, TraesCS7D02G2...
3 11 TraesCS7D02G246100, TraesCS3D02G284200, TraesC... TraesCS7D02G246100_OsCUC3, TraesCS3D02G284200_...
4 12 TraesCS6A02G287300, TraesCS1A02G418200, TraesC... TraesCS6A02G287300_OsLEC1, TraesCS1A02G418200_...
5 13 TraesCS6D02G220400, TraesCS1D02G162600, TraesC... TraesCS6D02G220400_TaYABBY7D, TraesCS1D02G1626...
6 14 TraesCS4D02G296400, TraesCS1D02G197300, TraesC... TraesCS4D02G296400, TraesCS1D02G197300_OsROC3t...
7 15 TraesCS6A02G259000, TraesCS2B02G464200, TraesC... TraesCS6A02G259000_AGL6_OsMADS6, TraesCS2B02G4...
8 16 TraesCS6A02G259000, TraesCS1D02G127700, TraesC... TraesCS6A02G259000_AGL6_OsMADS6, TraesCS1D02G1...
9 17 TraesCS6A02G259000, TraesCS7A02G383800, TraesC... TraesCS6A02G259000_AGL6_OsMADS6, TraesCS7A02G3...
10 2 TraesCS6D02G220400, TraesCS2B02G403100, TraesC... TraesCS6D02G220400_TaYABBY7D, TraesCS2B02G4031...
11 3 TraesCS1B02G042200, TraesCS4A02G256700, TraesC... TraesCS1B02G042200_MT2, TraesCS4A02G256700_TaK...
12 4 TraesCS7A02G308400, TraesCS4D02G296400, TraesC... TraesCS7A02G308400_OsROC7t, TraesCS4D02G296400...
13 5 TraesCS3D02G284200, TraesCS1B02G042200, TraesC... TraesCS3D02G284200_OsMADS32, TraesCS1B02G04220...
14 6 TraesCS1B02G479300, TraesCS6A02G176400, TraesC... TraesCS1B02G479300, TraesCS6A02G176400_SRZ1, T...
15 7 TraesCS6A02G259000, TraesCS4A02G256700, TraesC... TraesCS6A02G259000_AGL6_OsMADS6, TraesCS4A02G2...
16 8 TraesCS4D02G245300, TraesCS2B02G403100, TraesC... TraesCS4D02G245300_TaYABBY4D, TraesCS2B02G4031...
17 9 TraesCS4B02G064000, TraesCS6A02G313800, TraesC... TraesCS4B02G064000_OsGIF3, TraesCS6A02G313800_...

Creating high quality UMAPs and spatial plots for publication¶

In [12]:
# Define custom colors for specific clusters
custom_cluster_colors = {
    '0': '#a70e62',
    '1': '#e10e3c',
    '2': '#f47140',
    '3': '#f0b932',
    '4' : '#ffd95a',
    '5': '#c2ed51',
    '6': '#8ed73d',
    '7': '#489c73',
    '8': '#1a8d83',
    '9': '#4915be',
    '10': '#2357d5',
    '11': '#fed2ff', 
    '12': '#6ecad5',
    '13': '#bd08f9',
    '14': '#886fc2',
    '15': '#e170c0',
    '16': '#ff33cc', 
    '17': '#e4a9e8'
}

default_color = 'white'
In [13]:
plt.rcParams['xtick.major.size'] = 10
plt.rcParams['ytick.major.size'] = 10

mapping late double ridge WT¶

In [14]:
# Filter to create a new AnnData object with only the sample 'VGN1e1'
adata_VGN1e1 = adata_spatial[adata_spatial.obs['dataset'] == 'VGN1e1'].copy()

adata_VGN1e1.obs.index = [index.split('-')[0] for index in adata_VGN1e1.obs.index]

# Convert adata_VGN1e1.obs to a dataframe and reset the index to make the index a column
adata_VGN1e1_df = adata_VGN1e1.obs.reset_index()

# Rename the index column if necessary (for example, to 'cell_id')
adata_VGN1e1_df = adata_VGN1e1_df.rename(columns={'index': 'cell_id'})


# Load segmentation data
segmentation_df = gpd.read_parquet("cell_segmentation/VGN1e_region1_output/cellpose2_micron_space_VGN1e1.parquet")

#filter segmentation data to only include high quality, analysed cells
#list the cell ids we want to include
adata_VGN1e1_df['cell_id'] = adata_VGN1e1_df['cell_id'].astype(str).str.strip()
cell_ids = adata_VGN1e1_df['cell_id'].tolist()
cell_ids = [cell_id.strip() for cell_id in cell_ids]

# Convert EntityID in filtered_segmentation_df to strings to match with cell_ids
filtered_segmentation_df = segmentation_df.copy()
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)

# Now filter based on matching values in cell_ids
filtered_segmentation_df = filtered_segmentation_df[filtered_segmentation_df['EntityID'].isin(cell_ids)]

#include cluster infomration into the dataframe
adata_VGN1e1_df = adata_VGN1e1.obs[['clusters']].copy()
adata_VGN1e1_df.index = adata_VGN1e1_df.index.astype(str)  # Ensure the index is a string if necessary
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
filtered_segmentation_df = filtered_segmentation_df.merge(adata_VGN1e1_df, left_on='EntityID', right_index=True)
filtered_segmentation_df
Out[14]:
ID EntityID ZIndex Geometry ParentType ParentID Type ZLevel Name clusters
48334 48334 2343479300017100075 1 MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... None None cell 3.0 None 6
48829 48829 2343479300017100075 6 MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... None None cell 10.5 None 6
48664 48664 2343479300017100075 0 MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... None None cell 1.5 None 6
48499 48499 2343479300017100075 5 MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... None None cell 9.0 None 6
48004 48004 2343479300017100075 2 MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... None None cell 4.5 None 6
... ... ... ... ... ... ... ... ... ... ...
28175 28175 2343479300098100014 2 MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... None None cell 4.5 None 12
28195 28195 2343479300098100014 0 MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... None None cell 1.5 None 12
28200 28200 2343479300098100014 6 MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... None None cell 10.5 None 12
28185 28185 2343479300098100014 1 MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... None None cell 3.0 None 12
28170 28170 2343479300098100014 3 MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... None None cell 6.0 None 12

15463 rows × 10 columns

In [15]:
# I need to look into the region min and max values so I know where to set the axes of graph

# Variables to track min and max x, y
min_x, min_y = float('inf'), float('inf')
max_x, max_y = float('-inf'), float('-inf')

# Iterate through the geometries
for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    
    if isinstance(shape, (Polygon, MultiPolygon)):
        # Get the bounding box of the shape
        bounds = shape.bounds  # Returns (min_x, min_y, max_x, max_y)
        min_x = min(min_x, bounds[0])
        min_y = min(min_y, bounds[1])
        max_x = max(max_x, bounds[2])
        max_y = max(max_y, bounds[3])

# Round to the nearest 1's place
min_x, min_y, max_x, max_y = round(min_x), round(min_y), round(max_x), round(max_y)

# Print the rounded min and max values for x and y
print(f"Min X: {min_x}, Max X: {max_x}")
print(f"Min Y: {min_y}, Max Y: {max_y}")
Min X: 3373, Max X: 4451
Min Y: 6363, Max Y: 7851
In [16]:
fig, ax = plt.subplots()
patches_list = []

for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    cluster = str(row['clusters'])  # Convert cluster to string to match custom colors dict keys
    
    # Assign color based on the custom mapping, with a default fallback
    color = custom_cluster_colors.get(cluster, default_color)

    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
            patches_list.append(patch)
    elif isinstance(shape, Polygon):
        patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
        patches_list.append(patch)

# Set axis limits
ax.set_xlim([3200, 4500])
ax.set_ylim([6400, 7800])
ax.set_aspect('equal')

# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)
plt.show()
No description has been provided for this image
In [22]:
import numpy as np
from shapely.geometry import Polygon, MultiPolygon
from shapely.affinity import affine_transform
from matplotlib.lines import Line2D

# Ensure the original geometries are preserved
if 'Original_Geometry' not in filtered_segmentation_df.columns:
    filtered_segmentation_df['Original_Geometry'] = filtered_segmentation_df['Geometry']

# Define rotation angle in degrees
rotation_angle = 20
angle_rad = np.radians(rotation_angle)

# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)

# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2  # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2  # Use the previously computed min_y, max_y

# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
    cos_theta, -sin_theta,  # a, b
    sin_theta, cos_theta,   # d, e
    centroid_x - cos_theta * centroid_x + sin_theta * centroid_y,  # xoff
    centroid_y - sin_theta * centroid_x - cos_theta * centroid_y   # yoff
]

# Rotate the original geometries
rotated_geometries = filtered_segmentation_df['Original_Geometry'].apply(lambda geom: affine_transform(geom, rotation_matrix))

# Update the dataframe with the rotated geometries
filtered_segmentation_df['Geometry'] = rotated_geometries

# Plot the rotated sample
fig, ax = plt.subplots()
patches_list = []

for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    cluster = str(row['clusters'])  # Convert cluster to string to match custom colors dict keys
    
    # Assign color based on the custom mapping, with a default fallback
    color = custom_cluster_colors.get(cluster, default_color)

    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
            patches_list.append(patch)
    elif isinstance(shape, Polygon):
        patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
        patches_list.append(patch)

# Adjust axis limits as needed
ax.set_xlim([3400, 4500])  # Update if necessary after rotation
ax.set_ylim([6200, 8000])  # Update if necessary after rotation
ax.set_aspect('equal')

# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)

# Add the scale bar
scale_bar_length_microns = 500  # Length of scale bar in microns
scale_bar_x_start = 3700        # X-coordinate for the start of the scale bar
scale_bar_y_start = 6300        # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns  # Assuming 1 unit = 1 micron

# Add scale bar as a line
scale_bar = Line2D(
    [scale_bar_x_start, scale_bar_x_start + scale_bar_length_units],
    [scale_bar_y_start, scale_bar_y_start],
    color="black",
    linewidth=2
)
ax.add_line(scale_bar)

# Add label for scale bar
ax.text(
    scale_bar_x_start + scale_bar_length_units / 2,
    scale_bar_y_start - 20,  # Adjust to place label below the bar
    f"{scale_bar_length_microns} µm",
    color="black",
    ha="center",
    va="top",
    fontsize=8
)

plt.savefig('leidenclustering/VGN1e1_domainmap_500scalebar.png', bbox_inches='tight', dpi=700, format='png', transparent=False)
plt.show()
No description has been provided for this image
In [19]:
### mapping late double ridge MUT
In [24]:
# Filter to create a new AnnData object with only the sample 'VGN1e9'
adata_VGN1e9 = adata_spatial[adata_spatial.obs['dataset'] == 'VGN1e9'].copy()

adata_VGN1e9.obs.index = [index.split('-')[0] for index in adata_VGN1e9.obs.index]

# Convert adata_VGN1e1.obs to a dataframe and reset the index to make the index a column
adata_VGN1e9_df = adata_VGN1e9.obs.reset_index()

# Rename the index column if necessary (for example, to 'cell_id')
adata_VGN1e9_df = adata_VGN1e9_df.rename(columns={'index': 'cell_id'})


# Load segmentation data
segmentation_df = gpd.read_parquet("cell_segmentation/VGN1e_region9_output/cellpose2_micron_space_VGN1e9.parquet")

#filter segmentation data to only include high quality, analysed cells
#list the cell ids we want to include
adata_VGN1e9_df['cell_id'] = adata_VGN1e9_df['cell_id'].astype(str).str.strip()
cell_ids = adata_VGN1e9_df['cell_id'].tolist()
cell_ids = [cell_id.strip() for cell_id in cell_ids]

# Convert EntityID in filtered_segmentation_df to strings to match with cell_ids
filtered_segmentation_df = segmentation_df.copy()
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)

# Now filter based on matching values in cell_ids
filtered_segmentation_df = filtered_segmentation_df[filtered_segmentation_df['EntityID'].isin(cell_ids)]

#include cluster infomration into the dataframe
adata_VGN1e9_df = adata_VGN1e9.obs[['clusters']].copy()
adata_VGN1e9_df.index = adata_VGN1e9_df.index.astype(str)  # Ensure the index is a string if necessary
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
filtered_segmentation_df = filtered_segmentation_df.merge(adata_VGN1e9_df, left_on='EntityID', right_index=True)
filtered_segmentation_df
Out[24]:
ID EntityID ZIndex Geometry ParentType ParentID Type ZLevel Name clusters
56991 56991 2655507000046100147 3 MULTIPOLYGON (((6828.874 8057.041, 6828.979 80... None None cell 6.0 None 6
57911 57911 2655507000046100147 5 MULTIPOLYGON (((6828.874 8057.041, 6828.979 80... None None cell 9.0 None 6
57451 57451 2655507000046100147 4 MULTIPOLYGON (((6828.874 8057.041, 6828.979 80... None None cell 7.5 None 6
58141 58141 2655507000046100147 0 MULTIPOLYGON (((6828.874 8057.041, 6828.979 80... None None cell 1.5 None 6
57681 57681 2655507000046100147 1 MULTIPOLYGON (((6828.874 8057.041, 6828.979 80... None None cell 3.0 None 6
... ... ... ... ... ... ... ... ... ... ...
15423 15423 2655507000138100178 5 MULTIPOLYGON (((7305.041 9536.552, 7308.216 95... None None cell 9.0 None 0
15163 15163 2655507000138100178 3 MULTIPOLYGON (((7305.041 9536.552, 7308.216 95... None None cell 6.0 None 0
15358 15358 2655507000138100178 1 MULTIPOLYGON (((7305.041 9536.552, 7308.216 95... None None cell 3.0 None 0
15553 15553 2655507000138100178 6 MULTIPOLYGON (((7305.041 9536.552, 7308.216 95... None None cell 10.5 None 0
15488 15488 2655507000138100178 0 MULTIPOLYGON (((7305.041 9536.552, 7308.216 95... None None cell 1.5 None 0

10983 rows × 10 columns

In [25]:
# I need to look into the region min and max values so I know where to set the axes of graph

# Variables to track min and max x, y
min_x, min_y = float('inf'), float('inf')
max_x, max_y = float('-inf'), float('-inf')

# Iterate through the geometries
for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    
    if isinstance(shape, (Polygon, MultiPolygon)):
        # Get the bounding box of the shape
        bounds = shape.bounds  # Returns (min_x, min_y, max_x, max_y)
        min_x = min(min_x, bounds[0])
        min_y = min(min_y, bounds[1])
        max_x = max(max_x, bounds[2])
        max_y = max(max_y, bounds[3])

# Round to the nearest 1's place
min_x, min_y, max_x, max_y = round(min_x), round(min_y), round(max_x), round(max_y)

# Print the rounded min and max values for x and y
print(f"Min X: {min_x}, Max X: {max_x}")
print(f"Min Y: {min_y}, Max Y: {max_y}")
Min X: 6525, Max X: 7400
Min Y: 7978, Max Y: 9548
In [26]:
fig, ax = plt.subplots()
patches_list = []

for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    cluster = str(row['clusters'])  # Convert cluster to string to match custom colors dict keys
    
    # Assign color based on the custom mapping, with a default fallback
    color = custom_cluster_colors.get(cluster, default_color)

    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
            patches_list.append(patch)
    elif isinstance(shape, Polygon):
        patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
        patches_list.append(patch)

# Set axis limits
ax.set_xlim([6700, 7500])
ax.set_ylim([8000, 9600])
ax.set_aspect('equal')

# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)

plt.show()
No description has been provided for this image
In [27]:
# Ensure the original geometries are preserved
if 'Original_Geometry' not in filtered_segmentation_df.columns:
    filtered_segmentation_df['Original_Geometry'] = filtered_segmentation_df['Geometry']

# Define rotation angle in degrees
rotation_angle = 20
angle_rad = np.radians(rotation_angle)

# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)

# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2  # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2  # Use the previously computed min_y, max_y

# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
    cos_theta, -sin_theta,  # a, b
    sin_theta, cos_theta,   # d, e
    centroid_x - cos_theta * centroid_x + sin_theta * centroid_y,  # xoff
    centroid_y - sin_theta * centroid_x - cos_theta * centroid_y   # yoff
]

# Rotate the original geometries
rotated_geometries = filtered_segmentation_df['Original_Geometry'].apply(lambda geom: affine_transform(geom, rotation_matrix))

# Update the dataframe with the rotated geometries
filtered_segmentation_df['Geometry'] = rotated_geometries

# Plot the rotated sample
fig, ax = plt.subplots()
patches_list = []

for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    cluster = str(row['clusters'])  # Convert cluster to string to match custom colors dict keys
    
    # Assign color based on the custom mapping, with a default fallback
    color = custom_cluster_colors.get(cluster, default_color)

    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
            patches_list.append(patch)
    elif isinstance(shape, Polygon):
        patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
        patches_list.append(patch)

# Adjust axis limits as needed
ax.set_xlim([6700, 7400])
ax.set_ylim([8200, 9700])
ax.set_aspect('equal')

# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)

# Add the scale bar
scale_bar_length_microns = 500  # Length of scale bar in microns
scale_bar_x_start = 6720        # X-coordinate for the start of the scale bar
scale_bar_y_start = 8350       # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns  # Assuming 1 unit = 1 micron

# Add scale bar as a line
scale_bar = Line2D(
    [scale_bar_x_start, scale_bar_x_start + scale_bar_length_units],
    [scale_bar_y_start, scale_bar_y_start],
    color="black",
    linewidth=2
)
ax.add_line(scale_bar)

# Add label for scale bar
ax.text(
    scale_bar_x_start + scale_bar_length_units / 2,
    scale_bar_y_start - 20,  # Adjust to place label below the bar
    f"{scale_bar_length_microns} µm",
    color="black",
    ha="center",
    va="top",
    fontsize=8
)

plt.savefig('leidenclustering/VGN1e9_domainmap_500scalebar.png', dpi=700, bbox_inches='tight', format='png', transparent=False)
plt.show()
No description has been provided for this image

mapping lemma primordia WT¶

In [28]:
# Filter to create a new AnnData object with only the sample 'VGN1b6'
adata_VGN1b6 = adata_spatial[adata_spatial.obs['dataset'] == 'VGN1b6'].copy()

adata_VGN1b6.obs.index = [index.split('-')[0] for index in adata_VGN1b6.obs.index]

# Convert adata_VGN1e1.obs to a dataframe and reset the index to make the index a column
adata_VGN1b6_df = adata_VGN1b6.obs.reset_index()

# Rename the index column if necessary (for example, to 'cell_id')
adata_VGN1b6_df = adata_VGN1b6_df.rename(columns={'index': 'cell_id'})


# Load segmentation data
segmentation_df = gpd.read_parquet("cell_segmentation/VGN1b_region6_output/cellpose2_micron_space_VGN1b6.parquet")

#filter segmentation data to only include high quality, analysed cells
#list the cell ids we want to include
adata_VGN1b6_df['cell_id'] = adata_VGN1b6_df['cell_id'].astype(str).str.strip()
cell_ids = adata_VGN1b6_df['cell_id'].tolist()
cell_ids = [cell_id.strip() for cell_id in cell_ids]

# Convert EntityID in filtered_segmentation_df to strings to match with cell_ids
filtered_segmentation_df = segmentation_df.copy()
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)

# Now filter based on matching values in cell_ids
filtered_segmentation_df = filtered_segmentation_df[filtered_segmentation_df['EntityID'].isin(cell_ids)]

#include cluster infomration into the dataframe
adata_VGN1b6_df = adata_VGN1b6.obs[['clusters']].copy()
adata_VGN1b6_df.index = adata_VGN1b6_df.index.astype(str)  # Ensure the index is a string if necessary
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
filtered_segmentation_df = filtered_segmentation_df.merge(adata_VGN1b6_df, left_on='EntityID', right_index=True)
filtered_segmentation_df
Out[28]:
ID EntityID ZIndex Geometry ParentType ParentID Type ZLevel Name clusters
18274 18274 2293012600006100168 1 MULTIPOLYGON (((5555.024 4413.845, 5555.399 44... None None cell 3.0 None 4
17778 17778 2293012600006100168 2 MULTIPOLYGON (((5555.024 4413.845, 5555.399 44... None None cell 4.5 None 4
18770 18770 2293012600006100168 0 MULTIPOLYGON (((5555.024 4413.845, 5555.399 44... None None cell 1.5 None 4
17530 17530 2293012600006100168 3 MULTIPOLYGON (((5555.024 4413.845, 5555.399 44... None None cell 6.0 None 4
19018 19018 2293012600006100168 6 MULTIPOLYGON (((5555.024 4413.845, 5555.399 44... None None cell 10.5 None 4
... ... ... ... ... ... ... ... ... ... ...
62520 62520 2293012600036100142 6 MULTIPOLYGON (((5452.244 6030.988, 5452.287 60... None None cell 10.5 None 9
61842 61842 2293012600036100142 3 MULTIPOLYGON (((5452.244 6030.988, 5452.287 60... None None cell 6.0 None 9
61955 61955 2293012600036100142 2 MULTIPOLYGON (((5452.244 6030.988, 5452.287 60... None None cell 4.5 None 9
62294 62294 2293012600036100142 5 MULTIPOLYGON (((5452.244 6030.988, 5452.287 60... None None cell 9.0 None 9
62068 62068 2293012600036100142 4 MULTIPOLYGON (((5452.244 6030.988, 5452.287 60... None None cell 7.5 None 9

20272 rows × 10 columns

In [29]:
# I need to look into the region min and max values so I know where to set the axes of graph

# Variables to track min and max x, y
min_x, min_y = float('inf'), float('inf')
max_x, max_y = float('-inf'), float('-inf')

# Iterate through the geometries
for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    
    if isinstance(shape, (Polygon, MultiPolygon)):
        # Get the bounding box of the shape
        bounds = shape.bounds  # Returns (min_x, min_y, max_x, max_y)
        min_x = min(min_x, bounds[0])
        min_y = min(min_y, bounds[1])
        max_x = max(max_x, bounds[2])
        max_y = max(max_y, bounds[3])

# Round to the nearest 1's place
min_x, min_y, max_x, max_y = round(min_x), round(min_y), round(max_x), round(max_y)

# Print the rounded min and max values for x and y
print(f"Min X: {min_x}, Max X: {max_x}")
print(f"Min Y: {min_y}, Max Y: {max_y}")
Min X: 5003, Max X: 5796
Min Y: 4333, Max Y: 6075
In [30]:
fig, ax = plt.subplots()
patches_list = []

for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    cluster = str(row['clusters'])  # Convert cluster to string to match custom colors dict keys
    
    # Assign color based on the custom mapping, with a default fallback
    color = custom_cluster_colors.get(cluster, default_color)

    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
            patches_list.append(patch)
    elif isinstance(shape, Polygon):
        patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
        patches_list.append(patch)

# Set axis limits
ax.set_xlim([4800, 5900])
ax.set_ylim([4200, 6200])
ax.set_aspect('equal')

# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)

plt.show()
No description has been provided for this image
In [31]:
# Ensure the original geometries are preserved
if 'Original_Geometry' not in filtered_segmentation_df.columns:
    filtered_segmentation_df['Original_Geometry'] = filtered_segmentation_df['Geometry']

# Define rotation angle in degrees
rotation_angle = 165
angle_rad = np.radians(rotation_angle)

# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)

# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2  # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2  # Use the previously computed min_y, max_y

# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
    cos_theta, -sin_theta,  # a, b
    sin_theta, cos_theta,   # d, e
    centroid_x - cos_theta * centroid_x + sin_theta * centroid_y,  # xoff
    centroid_y - sin_theta * centroid_x - cos_theta * centroid_y   # yoff
]

# Rotate the original geometries
rotated_geometries = filtered_segmentation_df['Original_Geometry'].apply(lambda geom: affine_transform(geom, rotation_matrix))

# Update the dataframe with the rotated geometries
filtered_segmentation_df['Geometry'] = rotated_geometries

# Plot the rotated sample
fig, ax = plt.subplots()
patches_list = []

for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    cluster = str(row['clusters'])  # Convert cluster to string to match custom colors dict keys
    
    # Assign color based on the custom mapping, with a default fallback
    color = custom_cluster_colors.get(cluster, default_color)

    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
            patches_list.append(patch)
    elif isinstance(shape, Polygon):
        patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
        patches_list.append(patch)

# Adjust axis limits as needed
ax.set_xlim([4800, 5900])
ax.set_ylim([4200, 6200])
ax.set_aspect('equal')

# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)

# Add the scale bar
scale_bar_length_microns = 500  # Length of scale bar in microns
scale_bar_x_start = 4830        # X-coordinate for the start of the scale bar
scale_bar_y_start = 4350       # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns  # Assuming 1 unit = 1 micron

# Add scale bar as a line
scale_bar = Line2D(
    [scale_bar_x_start, scale_bar_x_start + scale_bar_length_units],
    [scale_bar_y_start, scale_bar_y_start],
    color="black",
    linewidth=2
)
ax.add_line(scale_bar)

# Add label for scale bar
ax.text(
    scale_bar_x_start + scale_bar_length_units / 2,
    scale_bar_y_start - 20,  # Adjust to place label below the bar
    f"{scale_bar_length_microns} µm",
    color="black",
    ha="center",
    va="top",
    fontsize=8
)

plt.savefig('leidenclustering/VGN1b6_domainmap_500scalebar.png', dpi=1000, bbox_inches='tight', format='png', transparent=False)
plt.show()
No description has been provided for this image

I also want to make a domain map for the specific ridges

In [33]:
# Define custom colors for specific clusters
custom_cluster_colors2 = {
    '1': '#e10e3c',
    '2': '#f47140',
    '15': '#e170c0'
}

# Ensure the original geometries are preserved
if 'Original_Geometry' not in filtered_segmentation_df.columns:
    filtered_segmentation_df['Original_Geometry'] = filtered_segmentation_df['Geometry']

# Define rotation angle in degrees
rotation_angle = 165
angle_rad = np.radians(rotation_angle)

# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)

# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2  # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2  # Use the previously computed min_y, max_y

# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
    cos_theta, -sin_theta,  # a, b
    sin_theta, cos_theta,   # d, e
    centroid_x - cos_theta * centroid_x + sin_theta * centroid_y,  # xoff
    centroid_y - sin_theta * centroid_x - cos_theta * centroid_y   # yoff
]

# Rotate the original geometries
rotated_geometries = filtered_segmentation_df['Original_Geometry'].apply(lambda geom: affine_transform(geom, rotation_matrix))

# Update the dataframe with the rotated geometries
filtered_segmentation_df['Geometry'] = rotated_geometries

# Plot the rotated sample
fig, ax = plt.subplots()
patches_list = []

for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    cluster = str(row['clusters'])  # Convert cluster to string to match custom colors dict keys
    
    # Assign color based on the custom mapping, with a default fallback
    color = custom_cluster_colors2.get(cluster, default_color)

    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='#595959', linewidth=0.5)
            patches_list.append(patch)
    elif isinstance(shape, Polygon):
        patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='#595959', linewidth=0.5)
        patches_list.append(patch)

# Adjust axis limits as needed
ax.set_xlim([5400, 5800])
ax.set_ylim([4800, 5290])
ax.set_aspect('equal')

# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)

# Add the scale bar
scale_bar_length_microns = 100  # Length of scale bar in microns
scale_bar_x_start = 5410      # X-coordinate for the start of the scale bar
scale_bar_y_start = 4810       # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns  # Assuming 1 unit = 1 micron

# Add scale bar as a line
scale_bar = Line2D(
    [scale_bar_x_start, scale_bar_x_start + scale_bar_length_units],
    [scale_bar_y_start, scale_bar_y_start],
    color="black",
    linewidth=2
)
ax.add_line(scale_bar)

plt.savefig('leidenclustering/VGN1b6_spikelettissues_100scalebar.png', dpi=700, bbox_inches='tight', format='png', transparent=False)
plt.show()
No description has been provided for this image

mapping lemma primordia MUT¶

In [34]:
# Filter to create a new AnnData object with only the sample 'VGN1b8'
adata_VGN1b8 = adata_spatial[adata_spatial.obs['dataset'] == 'VGN1b8'].copy()

adata_VGN1b8.obs.index = [index.split('-')[0] for index in adata_VGN1b8.obs.index]

# Convert adata_VGN1e1.obs to a dataframe and reset the index to make the index a column
adata_VGN1b8_df = adata_VGN1b8.obs.reset_index()

# Rename the index column if necessary (for example, to 'cell_id')
adata_VGN1b8_df = adata_VGN1b8_df.rename(columns={'index': 'cell_id'})


# Load segmentation data
segmentation_df = gpd.read_parquet("cell_segmentation/VGN1b_region8_output/cellpose2_micron_space_VGN1b8.parquet")

#filter segmentation data to only include high quality, analysed cells
#list the cell ids we want to include
adata_VGN1b8_df['cell_id'] = adata_VGN1b8_df['cell_id'].astype(str).str.strip()
cell_ids = adata_VGN1b8_df['cell_id'].tolist()
cell_ids = [cell_id.strip() for cell_id in cell_ids]

# Convert EntityID in filtered_segmentation_df to strings to match with cell_ids
filtered_segmentation_df = segmentation_df.copy()
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)

# Now filter based on matching values in cell_ids
filtered_segmentation_df = filtered_segmentation_df[filtered_segmentation_df['EntityID'].isin(cell_ids)]

#include cluster infomration into the dataframe
adata_VGN1b8_df = adata_VGN1b8.obs[['clusters']].copy()
adata_VGN1b8_df.index = adata_VGN1b8_df.index.astype(str)  # Ensure the index is a string if necessary
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
filtered_segmentation_df = filtered_segmentation_df.merge(adata_VGN1b8_df, left_on='EntityID', right_index=True)
filtered_segmentation_df
Out[34]:
ID EntityID ZIndex Geometry ParentType ParentID Type ZLevel Name clusters
43190 43190 2654750700021100077 2 MULTIPOLYGON (((1641.195 6262.854, 1641.434 62... None None cell 4.5 None 4
44086 44086 2654750700021100077 0 MULTIPOLYGON (((1641.195 6262.854, 1641.434 62... None None cell 1.5 None 4
44310 44310 2654750700021100077 6 MULTIPOLYGON (((1641.195 6262.854, 1641.434 62... None None cell 10.5 None 4
43414 43414 2654750700021100077 4 MULTIPOLYGON (((1641.195 6262.854, 1641.434 62... None None cell 7.5 None 4
43638 43638 2654750700021100077 1 MULTIPOLYGON (((1641.195 6262.854, 1641.434 62... None None cell 3.0 None 4
... ... ... ... ... ... ... ... ... ... ...
16813 16813 2654750700061100059 0 MULTIPOLYGON (((1407.301 7467.331, 1407.942 74... None None cell 1.5 None 9
16783 16783 2654750700061100059 5 MULTIPOLYGON (((1407.301 7467.331, 1407.942 74... None None cell 9.0 None 9
16663 16663 2654750700061100059 3 MULTIPOLYGON (((1407.301 7467.331, 1407.942 74... None None cell 6.0 None 9
16723 16723 2654750700061100059 4 MULTIPOLYGON (((1407.301 7467.331, 1407.942 74... None None cell 7.5 None 9
16843 16843 2654750700061100059 6 MULTIPOLYGON (((1407.301 7467.331, 1407.942 74... None None cell 10.5 None 9

14749 rows × 10 columns

In [35]:
# I need to look into the region min and max values so I know where to set the axes of graph

# Variables to track min and max x, y
min_x, min_y = float('inf'), float('inf')
max_x, max_y = float('-inf'), float('-inf')

# Iterate through the geometries
for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    
    if isinstance(shape, (Polygon, MultiPolygon)):
        # Get the bounding box of the shape
        bounds = shape.bounds  # Returns (min_x, min_y, max_x, max_y)
        min_x = min(min_x, bounds[0])
        min_y = min(min_y, bounds[1])
        max_x = max(max_x, bounds[2])
        max_y = max(max_y, bounds[3])

# Round to the nearest 1's place
min_x, min_y, max_x, max_y = round(min_x), round(min_y), round(max_x), round(max_y)

# Print the rounded min and max values for x and y
print(f"Min X: {min_x}, Max X: {max_x}")
print(f"Min Y: {min_y}, Max Y: {max_y}")
Min X: 884, Max X: 1809
Min Y: 6243, Max Y: 7527
In [36]:
fig, ax = plt.subplots()
patches_list = []

for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    cluster = str(row['clusters'])  # Convert cluster to string to match custom colors dict keys
    
    # Assign color based on the custom mapping, with a default fallback
    color = custom_cluster_colors.get(cluster, default_color)

    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
            patches_list.append(patch)
    elif isinstance(shape, Polygon):
        patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
        patches_list.append(patch)

# Set axis limits
ax.set_xlim([800, 1900])
ax.set_ylim([6100, 7600])
ax.set_aspect('equal')

# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)

plt.show()
No description has been provided for this image
In [37]:
# Ensure the original geometries are preserved
if 'Original_Geometry' not in filtered_segmentation_df.columns:
    filtered_segmentation_df['Original_Geometry'] = filtered_segmentation_df['Geometry']

# Define rotation angle in degrees
rotation_angle = 155
angle_rad = np.radians(rotation_angle)

# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)

# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2  # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2  # Use the previously computed min_y, max_y

# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
    cos_theta, -sin_theta,  # a, b
    sin_theta, cos_theta,   # d, e
    centroid_x - cos_theta * centroid_x + sin_theta * centroid_y,  # xoff
    centroid_y - sin_theta * centroid_x - cos_theta * centroid_y   # yoff
]

# Rotate the original geometries
rotated_geometries = filtered_segmentation_df['Original_Geometry'].apply(lambda geom: affine_transform(geom, rotation_matrix))

# Update the dataframe with the rotated geometries
filtered_segmentation_df['Geometry'] = rotated_geometries

# Plot the rotated sample
fig, ax = plt.subplots()
patches_list = []

for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    cluster = str(row['clusters'])  # Convert cluster to string to match custom colors dict keys
    
    # Assign color based on the custom mapping, with a default fallback
    color = custom_cluster_colors.get(cluster, default_color)

    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
            patches_list.append(patch)
    elif isinstance(shape, Polygon):
        patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
        patches_list.append(patch)

# Adjust axis limits as needed
ax.set_xlim([800, 1900])
ax.set_ylim([6100, 7800])
ax.set_aspect('equal')

# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)

# Add the scale bar
scale_bar_length_microns = 500  # Length of scale bar in microns
scale_bar_x_start = 830        # X-coordinate for the start of the scale bar
scale_bar_y_start = 6250       # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns  # Assuming 1 unit = 1 micron

# Add scale bar as a line
scale_bar = Line2D(
    [scale_bar_x_start, scale_bar_x_start + scale_bar_length_units],
    [scale_bar_y_start, scale_bar_y_start],
    color="black",
    linewidth=2
)
ax.add_line(scale_bar)

# Add label for scale bar
ax.text(
    scale_bar_x_start + scale_bar_length_units / 2,
    scale_bar_y_start - 20,  # Adjust to place label below the bar
    f"{scale_bar_length_microns} µm",
    color="black",
    ha="center",
    va="top",
    fontsize=8
)

plt.savefig('leidenclustering/VGN1b8_domainmap_500scalebar.png', dpi=1000, bbox_inches='tight', format='png', transparent=True)
plt.show()
No description has been provided for this image

mapping terminal spikelet WT¶

In [38]:
# Filter to create a new AnnData object with only the sample 'VGN1b8'
adata_VGN1a6 = adata_spatial[adata_spatial.obs['dataset'] == 'VGN1a6'].copy()

adata_VGN1a6.obs.index = [index.split('-')[0] for index in adata_VGN1a6.obs.index]

# Convert adata_VGN1e1.obs to a dataframe and reset the index to make the index a column
adata_VGN1a6_df = adata_VGN1a6.obs.reset_index()

# Rename the index column if necessary (for example, to 'cell_id')
adata_VGN1a6_df = adata_VGN1a6_df.rename(columns={'index': 'cell_id'})


# Load segmentation data
segmentation_df = gpd.read_parquet("cell_segmentation/VGN1a_region6_output/cellpose2_micron_space_VGN1a6.parquet")

#filter segmentation data to only include high quality, analysed cells
#list the cell ids we want to include
adata_VGN1a6_df['cell_id'] = adata_VGN1a6_df['cell_id'].astype(str).str.strip()
cell_ids = adata_VGN1a6_df['cell_id'].tolist()
cell_ids = [cell_id.strip() for cell_id in cell_ids]

# Convert EntityID in filtered_segmentation_df to strings to match with cell_ids
filtered_segmentation_df = segmentation_df.copy()
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)

# Now filter based on matching values in cell_ids
filtered_segmentation_df = filtered_segmentation_df[filtered_segmentation_df['EntityID'].isin(cell_ids)]

#include cluster infomration into the dataframe
adata_VGN1a6_df = adata_VGN1a6.obs[['clusters']].copy()
adata_VGN1a6_df.index = adata_VGN1a6_df.index.astype(str)  # Ensure the index is a string if necessary
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
filtered_segmentation_df = filtered_segmentation_df.merge(adata_VGN1a6_df, left_on='EntityID', right_index=True)
filtered_segmentation_df
Out[38]:
ID EntityID ZIndex Geometry ParentType ParentID Type ZLevel Name clusters
14985 14985 2305551000002100247 6 MULTIPOLYGON (((740.840 5156.623, 742.057 5157... None None cell 10.5 None 0
14772 14772 2305551000002100247 0 MULTIPOLYGON (((740.840 5156.623, 742.057 5157... None None cell 1.5 None 0
14346 14346 2305551000002100247 1 MULTIPOLYGON (((740.840 5156.623, 742.057 5157... None None cell 3.0 None 0
13920 13920 2305551000002100247 2 MULTIPOLYGON (((740.840 5156.623, 742.057 5157... None None cell 4.5 None 0
14133 14133 2305551000002100247 4 MULTIPOLYGON (((740.840 5156.623, 742.057 5157... None None cell 7.5 None 0
... ... ... ... ... ... ... ... ... ... ...
12713 12713 2305551000055100230 2 MULTIPOLYGON (((311.576 7442.294, 316.129 7442... None None cell 4.5 None 1
12608 12608 2305551000055100230 3 MULTIPOLYGON (((311.576 7442.294, 316.129 7442... None None cell 6.0 None 1
13133 13133 2305551000055100230 0 MULTIPOLYGON (((311.576 7442.294, 316.129 7442... None None cell 1.5 None 1
13238 13238 2305551000055100230 6 MULTIPOLYGON (((311.576 7442.294, 316.129 7442... None None cell 10.5 None 1
13028 13028 2305551000055100230 5 MULTIPOLYGON (((311.576 7442.294, 316.129 7442... None None cell 9.0 None 1

37835 rows × 10 columns

In [39]:
# I need to look into the region min and max values so I know where to set the axes of graph

# Variables to track min and max x, y
min_x, min_y = float('inf'), float('inf')
max_x, max_y = float('-inf'), float('-inf')

# Iterate through the geometries
for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    
    if isinstance(shape, (Polygon, MultiPolygon)):
        # Get the bounding box of the shape
        bounds = shape.bounds  # Returns (min_x, min_y, max_x, max_y)
        min_x = min(min_x, bounds[0])
        min_y = min(min_y, bounds[1])
        max_x = max(max_x, bounds[2])
        max_y = max(max_y, bounds[3])

# Round to the nearest 1's place
min_x, min_y, max_x, max_y = round(min_x), round(min_y), round(max_x), round(max_y)

# Print the rounded min and max values for x and y
print(f"Min X: {min_x}, Max X: {max_x}")
print(f"Min Y: {min_y}, Max Y: {max_y}")
Min X: 178, Max X: 1475
Min Y: 5150, Max Y: 7524
In [58]:
fig, ax = plt.subplots()
patches_list = []

for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    cluster = str(row['clusters'])  # Convert cluster to string to match custom colors dict keys
    
    # Assign color based on the custom mapping, with a default fallback
    color = custom_cluster_colors.get(cluster, default_color)

    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
            patches_list.append(patch)
    elif isinstance(shape, Polygon):
        patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
        patches_list.append(patch)


# Set axis limits
ax.set_xlim([100, 1550])
ax.set_ylim([5000, 7603])
ax.set_aspect('equal')

# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)

plt.show()
No description has been provided for this image
In [40]:
# Ensure the original geometries are preserved
if 'Original_Geometry' not in filtered_segmentation_df.columns:
    filtered_segmentation_df['Original_Geometry'] = filtered_segmentation_df['Geometry']

# Define rotation angle in degrees
rotation_angle = -17
angle_rad = np.radians(rotation_angle)

# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)

# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2  # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2  # Use the previously computed min_y, max_y

# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
    cos_theta, -sin_theta,  # a, b
    sin_theta, cos_theta,   # d, e
    centroid_x - cos_theta * centroid_x + sin_theta * centroid_y,  # xoff
    centroid_y - sin_theta * centroid_x - cos_theta * centroid_y   # yoff
]

# Rotate the original geometries
rotated_geometries = filtered_segmentation_df['Original_Geometry'].apply(lambda geom: affine_transform(geom, rotation_matrix))

# Update the dataframe with the rotated geometries
filtered_segmentation_df['Geometry'] = rotated_geometries

# Plot the rotated sample
fig, ax = plt.subplots()
patches_list = []

for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    cluster = str(row['clusters'])  # Convert cluster to string to match custom colors dict keys
    
    # Assign color based on the custom mapping, with a default fallback
    color = custom_cluster_colors.get(cluster, default_color)

    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
            patches_list.append(patch)
    elif isinstance(shape, Polygon):
        patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
        patches_list.append(patch)

# Adjust axis limits as needed
ax.set_xlim([100, 1550])
ax.set_ylim([4800, 7750])
ax.set_aspect('equal')

# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)

# Add the scale bar
scale_bar_length_microns = 500  # Length of scale bar in microns
scale_bar_x_start = 140        # X-coordinate for the start of the scale bar
scale_bar_y_start = 5100       # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns  # Assuming 1 unit = 1 micron

# Add scale bar as a line
scale_bar = Line2D(
    [scale_bar_x_start, scale_bar_x_start + scale_bar_length_units],
    [scale_bar_y_start, scale_bar_y_start],
    color="black",
    linewidth=2
)
ax.add_line(scale_bar)

# Add label for scale bar
ax.text(
    scale_bar_x_start + scale_bar_length_units / 2,
    scale_bar_y_start - 20,  # Adjust to place label below the bar
    f"{scale_bar_length_microns} µm",
    color="black",
    ha="center",
    va="top",
    fontsize=8
)

plt.savefig('leidenclustering/VGN1a6_domainmap_500scalebar.png', dpi=700, bbox_inches='tight', format='png', transparent=False)
plt.show()
No description has been provided for this image

mapping terminal spikelet Mut¶

In [41]:
# Filter to create a new AnnData object with only the sample 'VGN1a4'
adata_VGN1a4 = adata_spatial[adata_spatial.obs['dataset'] == 'VGN1a4'].copy()

adata_VGN1a4.obs.index = [index.split('-')[0] for index in adata_VGN1a4.obs.index]

# Convert adata_VGN1e1.obs to a dataframe and reset the index to make the index a column
adata_VGN1a4_df = adata_VGN1a4.obs.reset_index()

# Rename the index column if necessary (for example, to 'cell_id')
adata_VGN1a4_df = adata_VGN1a4_df.rename(columns={'index': 'cell_id'})


# Load segmentation data
segmentation_df = gpd.read_parquet("cell_segmentation/VGN1a_region4_output/cellpose2_micron_space_VGN1a4.parquet")

#filter segmentation data to only include high quality, analysed cells
#list the cell ids we want to include
adata_VGN1a4_df['cell_id'] = adata_VGN1a4_df['cell_id'].astype(str).str.strip()
cell_ids = adata_VGN1a4_df['cell_id'].tolist()
cell_ids = [cell_id.strip() for cell_id in cell_ids]

# Convert EntityID in filtered_segmentation_df to strings to match with cell_ids
filtered_segmentation_df = segmentation_df.copy()
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)

# Now filter based on matching values in cell_ids
filtered_segmentation_df = filtered_segmentation_df[filtered_segmentation_df['EntityID'].isin(cell_ids)]

#include cluster infomration into the dataframe
adata_VGN1a4_df = adata_VGN1a4.obs[['clusters']].copy()
adata_VGN1a4_df.index = adata_VGN1a4_df.index.astype(str)  # Ensure the index is a string if necessary
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
filtered_segmentation_df = filtered_segmentation_df.merge(adata_VGN1a4_df, left_on='EntityID', right_index=True)
filtered_segmentation_df
Out[41]:
ID EntityID ZIndex Geometry ParentType ParentID Type ZLevel Name clusters
12211 12211 2655350200002100079 6 MULTIPOLYGON (((3014.803 5892.732, 3014.595 59... None None cell 10.5 None 9
11707 11707 2655350200002100079 5 MULTIPOLYGON (((3014.803 5892.732, 3014.595 59... None None cell 9.0 None 9
10951 10951 2655350200002100079 2 MULTIPOLYGON (((3014.803 5892.732, 3014.595 59... None None cell 4.5 None 9
10699 10699 2655350200002100079 3 MULTIPOLYGON (((3014.803 5892.732, 3014.595 59... None None cell 6.0 None 9
11203 11203 2655350200002100079 4 MULTIPOLYGON (((3014.803 5892.732, 3014.595 59... None None cell 7.5 None 9
... ... ... ... ... ... ... ... ... ... ...
10456 10456 2655350200055100164 1 MULTIPOLYGON (((2682.259 8226.024, 2682.387 82... None None cell 3.0 None 16
10393 10393 2655350200055100164 4 MULTIPOLYGON (((2682.259 8226.024, 2682.387 82... None None cell 7.5 None 16
10267 10267 2655350200055100164 3 MULTIPOLYGON (((2682.259 8226.024, 2682.387 82... None None cell 6.0 None 16
10645 10645 2655350200055100164 6 MULTIPOLYGON (((2682.259 8226.024, 2682.387 82... None None cell 10.5 None 16
10519 10519 2655350200055100164 5 MULTIPOLYGON (((2682.259 8226.024, 2682.387 82... None None cell 9.0 None 16

44695 rows × 10 columns

In [42]:
# I need to look into the region min and max values so I know where to set the axes of graph

# Variables to track min and max x, y
min_x, min_y = float('inf'), float('inf')
max_x, max_y = float('-inf'), float('-inf')

# Iterate through the geometries
for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    
    if isinstance(shape, (Polygon, MultiPolygon)):
        # Get the bounding box of the shape
        bounds = shape.bounds  # Returns (min_x, min_y, max_x, max_y)
        min_x = min(min_x, bounds[0])
        min_y = min(min_y, bounds[1])
        max_x = max(max_x, bounds[2])
        max_y = max(max_y, bounds[3])

# Round to the nearest 1's place
min_x, min_y, max_x, max_y = round(min_x), round(min_y), round(max_x), round(max_y)

# Print the rounded min and max values for x and y
print(f"Min X: {min_x}, Max X: {max_x}")
print(f"Min Y: {min_y}, Max Y: {max_y}")
Min X: 2401, Max X: 3781
Min Y: 5887, Max Y: 8298
In [43]:
fig, ax = plt.subplots()
patches_list = []

for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    cluster = str(row['clusters'])  # Convert cluster to string to match custom colors dict keys
    
    # Assign color based on the custom mapping, with a default fallback
    color = custom_cluster_colors.get(cluster, default_color)

    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
            patches_list.append(patch)
    elif isinstance(shape, Polygon):
        patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
        patches_list.append(patch)


# Set axis limits
ax.set_xlim([2300, 3800])
ax.set_ylim([5700, 8400])
ax.set_aspect('equal')

# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)

plt.show()
No description has been provided for this image
In [44]:
# Ensure the original geometries are preserved
if 'Original_Geometry' not in filtered_segmentation_df.columns:
    filtered_segmentation_df['Original_Geometry'] = filtered_segmentation_df['Geometry']

# Define rotation angle in degrees
rotation_angle = -20
angle_rad = np.radians(rotation_angle)

# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)

# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2  # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2  # Use the previously computed min_y, max_y

# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
    cos_theta, -sin_theta,  # a, b
    sin_theta, cos_theta,   # d, e
    centroid_x - cos_theta * centroid_x + sin_theta * centroid_y,  # xoff
    centroid_y - sin_theta * centroid_x - cos_theta * centroid_y   # yoff
]

# Rotate the original geometries
rotated_geometries = filtered_segmentation_df['Original_Geometry'].apply(lambda geom: affine_transform(geom, rotation_matrix))

# Update the dataframe with the rotated geometries
filtered_segmentation_df['Geometry'] = rotated_geometries

# Plot the rotated sample
fig, ax = plt.subplots()
patches_list = []

for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    cluster = str(row['clusters'])  # Convert cluster to string to match custom colors dict keys
    
    # Assign color based on the custom mapping, with a default fallback
    color = custom_cluster_colors.get(cluster, default_color)

    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
            patches_list.append(patch)
    elif isinstance(shape, Polygon):
        patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
        patches_list.append(patch)

# Adjust axis limits as needed
ax.set_xlim([2300, 3800])
ax.set_ylim([5700, 8400])
ax.set_aspect('equal')

# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)

# Add the scale bar
scale_bar_length_microns = 500  # Length of scale bar in microns
scale_bar_x_start = 2390        # X-coordinate for the start of the scale bar
scale_bar_y_start = 5900       # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns  # Assuming 1 unit = 1 micron

# Add scale bar as a line
scale_bar = Line2D(
    [scale_bar_x_start, scale_bar_x_start + scale_bar_length_units],
    [scale_bar_y_start, scale_bar_y_start],
    color="black",
    linewidth=2
)
ax.add_line(scale_bar)

# Add label for scale bar
ax.text(
    scale_bar_x_start + scale_bar_length_units / 2,
    scale_bar_y_start - 20,  # Adjust to place label below the bar
    f"{scale_bar_length_microns} µm",
    color="black",
    ha="center",
    va="top",
    fontsize=8
)

plt.savefig('leidenclustering/VGN1a4_domainmap_500scalebar.png', dpi=1000, bbox_inches='tight', format='png', transparent=False)
plt.show()
No description has been provided for this image

Carpel extension round WT¶

In [45]:
# Filter to create a new AnnData object with only the sample 'VGN1c2'
adata_VGN1c2 = adata_spatial[adata_spatial.obs['dataset'] == 'VGN1c2'].copy()

adata_VGN1c2.obs.index = [index.split('-')[0] for index in adata_VGN1c2.obs.index]

# Convert adata_VGN1e1.obs to a dataframe and reset the index to make the index a column
adata_VGN1c2_df = adata_VGN1c2.obs.reset_index()

# Rename the index column if necessary (for example, to 'cell_id')
adata_VGN1c2_df = adata_VGN1c2_df.rename(columns={'index': 'cell_id'})


# Load segmentation data
segmentation_df = gpd.read_parquet("cell_segmentation/VGN1c_region2_output/cellpose2_micron_space_VGN1c2.parquet")

#filter segmentation data to only include high quality, analysed cells
#list the cell ids we want to include
adata_VGN1c2_df['cell_id'] = adata_VGN1c2_df['cell_id'].astype(str).str.strip()
cell_ids = adata_VGN1c2_df['cell_id'].tolist()
cell_ids = [cell_id.strip() for cell_id in cell_ids]

# Convert EntityID in filtered_segmentation_df to strings to match with cell_ids
filtered_segmentation_df = segmentation_df.copy()
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)

# Now filter based on matching values in cell_ids
filtered_segmentation_df = filtered_segmentation_df[filtered_segmentation_df['EntityID'].isin(cell_ids)]

#include cluster infomration into the dataframe
adata_VGN1c2_df = adata_VGN1c2.obs[['clusters']].copy()
adata_VGN1c2_df.index = adata_VGN1c2_df.index.astype(str)  # Ensure the index is a string if necessary
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
filtered_segmentation_df = filtered_segmentation_df.merge(adata_VGN1c2_df, left_on='EntityID', right_index=True)
filtered_segmentation_df
Out[45]:
ID EntityID ZIndex Geometry ParentType ParentID Type ZLevel Name clusters
26701 26701 2293599400000100091 0 MULTIPOLYGON (((2147.053 2954.890, 2148.056 29... None None cell 1.5 None 14
26885 26885 2293599400000100091 6 MULTIPOLYGON (((2147.053 2954.890, 2148.056 29... None None cell 10.5 None 14
26517 26517 2293599400000100091 5 MULTIPOLYGON (((2147.053 2954.890, 2148.056 29... None None cell 9.0 None 14
25965 25965 2293599400000100091 2 MULTIPOLYGON (((2147.053 2954.890, 2148.056 29... None None cell 4.5 None 14
26333 26333 2293599400000100091 1 MULTIPOLYGON (((2147.053 2954.890, 2148.056 29... None None cell 3.0 None 14
... ... ... ... ... ... ... ... ... ... ...
222308 222308 2293599400212100353 4 MULTIPOLYGON (((5975.232 6093.677, 5975.280 60... None None cell 7.5 None 4
222102 222102 2293599400212100353 2 MULTIPOLYGON (((5975.232 6093.677, 5975.280 60... None None cell 4.5 None 4
222720 222720 2293599400212100353 5 MULTIPOLYGON (((5975.232 6093.677, 5975.280 60... None None cell 9.0 None 4
221896 221896 2293599400212100353 3 MULTIPOLYGON (((5975.232 6093.677, 5975.280 60... None None cell 6.0 None 4
222926 222926 2293599400212100353 0 MULTIPOLYGON (((5975.232 6093.677, 5975.280 60... None None cell 1.5 None 4

106295 rows × 10 columns

In [46]:
# I need to look into the region min and max values so I know where to set the axes of graph

# Variables to track min and max x, y
min_x, min_y = float('inf'), float('inf')
max_x, max_y = float('-inf'), float('-inf')

# Iterate through the geometries
for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    
    if isinstance(shape, (Polygon, MultiPolygon)):
        # Get the bounding box of the shape
        bounds = shape.bounds  # Returns (min_x, min_y, max_x, max_y)
        min_x = min(min_x, bounds[0])
        min_y = min(min_y, bounds[1])
        max_x = max(max_x, bounds[2])
        max_y = max(max_y, bounds[3])

# Round to the nearest 1's place
min_x, min_y, max_x, max_y = round(min_x), round(min_y), round(max_x), round(max_y)

# Print the rounded min and max values for x and y
print(f"Min X: {min_x}, Max X: {max_x}")
print(f"Min Y: {min_y}, Max Y: {max_y}")
Min X: 2060, Max X: 6522
Min Y: 2921, Max Y: 6114
In [47]:
fig, ax = plt.subplots()
patches_list = []

for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    cluster = str(row['clusters'])  # Convert cluster to string to match custom colors dict keys
    
    # Assign color based on the custom mapping, with a default fallback
    color = custom_cluster_colors.get(cluster, default_color)

    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
            patches_list.append(patch)
    elif isinstance(shape, Polygon):
        patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
        patches_list.append(patch)


# Set axis limits
ax.set_xlim([1900, 6650])
ax.set_ylim([2700, 6300])
ax.set_aspect('equal')

# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)

plt.show()
No description has been provided for this image
In [48]:
# Ensure the original geometries are preserved
if 'Original_Geometry' not in filtered_segmentation_df.columns:
    filtered_segmentation_df['Original_Geometry'] = filtered_segmentation_df['Geometry']

# Define rotation angle in degrees
rotation_angle = -124
angle_rad = np.radians(rotation_angle)

# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)

# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2  # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2  # Use the previously computed min_y, max_y

# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
    cos_theta, -sin_theta,  # a, b
    sin_theta, cos_theta,   # d, e
    centroid_x - cos_theta * centroid_x + sin_theta * centroid_y,  # xoff
    centroid_y - sin_theta * centroid_x - cos_theta * centroid_y   # yoff
]

# Rotate the original geometries
rotated_geometries = filtered_segmentation_df['Original_Geometry'].apply(lambda geom: affine_transform(geom, rotation_matrix))

# Update the dataframe with the rotated geometries
filtered_segmentation_df['Geometry'] = rotated_geometries

# Plot the rotated sample
fig, ax = plt.subplots()
patches_list = []

for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    cluster = str(row['clusters'])  # Convert cluster to string to match custom colors dict keys
    
    # Assign color based on the custom mapping, with a default fallback
    color = custom_cluster_colors.get(cluster, default_color)

    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
            patches_list.append(patch)
    elif isinstance(shape, Polygon):
        patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
        patches_list.append(patch)

# Adjust axis limits as needed
ax.set_xlim([3000, 5500])
ax.set_ylim([1600, 7400])
ax.set_aspect('equal')

# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)

# Add the scale bar
scale_bar_length_microns = 500  # Length of scale bar in microns
scale_bar_x_start = 3500        # X-coordinate for the start of the scale bar
scale_bar_y_start = 2000       # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns  # Assuming 1 unit = 1 micron

# Add scale bar as a line
scale_bar = Line2D(
    [scale_bar_x_start, scale_bar_x_start + scale_bar_length_units],
    [scale_bar_y_start, scale_bar_y_start],
    color="black",
    linewidth=2
)
ax.add_line(scale_bar)

# Add label for scale bar
ax.text(
    scale_bar_x_start + scale_bar_length_units / 2,
    scale_bar_y_start - 30,  # Adjust to place label below the bar
    f"{scale_bar_length_microns} µm",
    color="black",
    ha="center",
    va="top",
    fontsize=8
)

plt.savefig('leidenclustering/VGN1c2_domainmap_500scalebar.png', dpi=1000, bbox_inches='tight', format='png', transparent=False)
plt.show()
No description has been provided for this image

making domain map of floral tissues for figure 2

In [49]:
custom_cluster_colors2 = {
    '1': '#e10e3c',
    '2': '#f47140',
    '15': '#e170c0'
}

default_colour = 'whitesmoke'

# Ensure the original geometries are preserved
if 'Original_Geometry' not in filtered_segmentation_df.columns:
    filtered_segmentation_df['Original_Geometry'] = filtered_segmentation_df['Geometry']

# Define rotation angle in degrees
rotation_angle = -160
angle_rad = np.radians(rotation_angle)

# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)

# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2  # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2  # Use the previously computed min_y, max_y

# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
    cos_theta, -sin_theta,  # a, b
    sin_theta, cos_theta,   # d, e
    centroid_x - cos_theta * centroid_x + sin_theta * centroid_y,  # xoff
    centroid_y - sin_theta * centroid_x - cos_theta * centroid_y   # yoff
]

# Rotate the original geometries
rotated_geometries = filtered_segmentation_df['Original_Geometry'].apply(lambda geom: affine_transform(geom, rotation_matrix))

# Update the dataframe with the rotated geometries
filtered_segmentation_df['Geometry'] = rotated_geometries

# Plot the rotated sample
fig, ax = plt.subplots()
patches_list = []

for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    cluster = str(row['clusters'])  # Convert cluster to string to match custom colors dict keys
    
    # Assign color based on the custom mapping, with a default fallback
    color = custom_cluster_colors2.get(cluster, default_color)

    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='#595959', linewidth=0.15)
            patches_list.append(patch)
    elif isinstance(shape, Polygon):
        patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='#595959', linewidth=0.15)
        patches_list.append(patch)

# Adjust axis limits as needed
ax.set_xlim([3900, 4400])
ax.set_ylim([4400, 5500])
ax.set_aspect('equal')

# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)

# Add the scale bar
scale_bar_length_microns = 100 # Length of scale bar in microns
scale_bar_x_start = 4000        # X-coordinate for the start of the scale bar
scale_bar_y_start = 4500       # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns  # Assuming 1 unit = 1 micron

# Add scale bar as a line
scale_bar = Line2D(
    [scale_bar_x_start, scale_bar_x_start + scale_bar_length_units],
    [scale_bar_y_start, scale_bar_y_start],
    color="black",
    linewidth=2
)
ax.add_line(scale_bar)


plt.savefig('leidenclustering/VGN1c2_domainmap_floraltissues_100scalebar.png', dpi=700, bbox_inches='tight', format='png', transparent=False)
plt.show()
No description has been provided for this image

now I am making a heatmap of gene expression, used for figure 1

In [50]:
combined_cells_summary_matrix = pd.read_csv('qc/cellmetadata_genecounts_transcriptcounts.csv')
filtered_segmentation_df2 = filtered_segmentation_df.copy()

combined_cells_summary_matrix['EntityID'] = combined_cells_summary_matrix['EntityID'].astype(str)
filtered_segmentation_df2['EntityID'] = filtered_segmentation_df2['EntityID'].astype(str)

# Merge dataframes on 'EntityID', keeping only the necessary column
filtered_segmentation_df2 = filtered_segmentation_df2.merge(
    combined_cells_summary_matrix[['EntityID', 'TraesCS6A02G259000']],
    on='EntityID',
    how='left'
)
In [57]:
# Rotation logic
if 'Original_Geometry' not in filtered_segmentation_df2.columns:
    filtered_segmentation_df2['Original_Geometry'] = filtered_segmentation_df2['Geometry']

# Define rotation angle in degrees
rotation_angle = -160
angle_rad = np.radians(rotation_angle)

# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)

# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2  # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2  # Use the previously computed min_y, max_y

# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
    cos_theta, -sin_theta,  # a, b
    sin_theta, cos_theta,   # d, e
    centroid_x - cos_theta * centroid_x + sin_theta * centroid_y,  # xoff
    centroid_y - sin_theta * centroid_x - cos_theta * centroid_y   # yoff
]

# Rotate the original geometries
rotated_geometries = filtered_segmentation_df2['Original_Geometry'].apply(
    lambda geom: affine_transform(geom, rotation_matrix)
)
filtered_segmentation_df2['Geometry'] = rotated_geometries

# Initialize colormap
colormap = cm.get_cmap('viridis')  # You can use other colormaps like 'plasma' or 'cividis'
norm = colors.Normalize(vmin=0, vmax=37)

# Plot the heatmap
fig, ax = plt.subplots()
patches_list = []

for _, row in filtered_segmentation_df2.iterrows():
    shape = row['Geometry']
    normalized_value = row['TraesCS6A02G259000']
    color = colormap(norm(normalized_value))

    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            patch = patches.Polygon(
                list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.15
            )
            patches_list.append(patch)
    elif isinstance(shape, Polygon):
        patch = patches.Polygon(
            list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.15
        )
        patches_list.append(patch)

# Adjust axis limits if needed
ax.set_xlim([3900, 4400])
ax.set_ylim([4500, 4950])
ax.set_aspect('equal')

# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)

# Add the scale bar
scale_bar_length_microns = 250 # Length of scale bar in microns
scale_bar_x_start = 4100      # X-coordinate for the start of the scale bar
scale_bar_y_start = 4525       # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns  # Assuming 1 unit = 1 micron

scale_bar = Line2D(
    [scale_bar_x_start, scale_bar_x_start + scale_bar_length_microns],
    [scale_bar_y_start, scale_bar_y_start],
    color="white",
    linewidth=2
)
ax.add_line(scale_bar)

# Add colorbar
plt.colorbar(cm.ScalarMappable(norm=norm, cmap=colormap), ax=ax, label="Expression Level")
ax.set_facecolor('black')
plt.savefig('leidenclustering/VGN1c2_heatmapAGL6_floraltissues_100scalebar.png', dpi=700, bbox_inches='tight', format='png')
plt.show()
No description has been provided for this image
In [58]:
plt.style.use('dark_background')
# Rotation logic
if 'Original_Geometry' not in filtered_segmentation_df2.columns:
    filtered_segmentation_df2['Original_Geometry'] = filtered_segmentation_df2['Geometry']

# Define rotation angle in degrees
rotation_angle = -160
angle_rad = np.radians(rotation_angle)

# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)

# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2  # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2  # Use the previously computed min_y, max_y

# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
    cos_theta, -sin_theta,  # a, b
    sin_theta, cos_theta,   # d, e
    centroid_x - cos_theta * centroid_x + sin_theta * centroid_y,  # xoff
    centroid_y - sin_theta * centroid_x - cos_theta * centroid_y   # yoff
]

# Rotate the original geometries
rotated_geometries = filtered_segmentation_df2['Original_Geometry'].apply(
    lambda geom: affine_transform(geom, rotation_matrix)
)
filtered_segmentation_df2['Geometry'] = rotated_geometries

# Initialize colormap
colormap = cm.get_cmap('viridis')  # You can use other colormaps like 'plasma' or 'cividis'
norm = colors.Normalize(vmin=0, vmax=37)

# Plot the heatmap
fig, ax = plt.subplots()
patches_list = []

for _, row in filtered_segmentation_df2.iterrows():
    shape = row['Geometry']
    normalized_value = row['TraesCS6A02G259000']
    color = colormap(norm(normalized_value))

    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            patch = patches.Polygon(
                list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.15
            )
            patches_list.append(patch)
    elif isinstance(shape, Polygon):
        patch = patches.Polygon(
            list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.15
        )
        patches_list.append(patch)

# Adjust axis limits if needed
ax.set_xlim([3900, 4400])
ax.set_ylim([4500, 4950])
ax.set_aspect('equal')

# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)

# Add the scale bar
scale_bar_length_microns = 250 # Length of scale bar in microns
scale_bar_x_start = 4100      # X-coordinate for the start of the scale bar
scale_bar_y_start = 4525       # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns  # Assuming 1 unit = 1 micron

scale_bar = Line2D(
    [scale_bar_x_start, scale_bar_x_start + scale_bar_length_microns],
    [scale_bar_y_start, scale_bar_y_start],
    color="white",
    linewidth=2
)
ax.add_line(scale_bar)

# Add colorbar
plt.colorbar(cm.ScalarMappable(norm=norm, cmap=colormap), ax=ax, label="Expression Level")
ax.set_facecolor('black')
plt.savefig('leidenclustering/VGN1c2_heatmapAGL6_floraltissues_100scalebar_darkbackground.png', dpi=700, bbox_inches='tight', format='png')
plt.show()
No description has been provided for this image

carpel extension round MUT¶

In [64]:
# Filter to create a new AnnData object with only the sample 'VGN1c3'
adata_VGN1c3 = adata_spatial[adata_spatial.obs['dataset'] == 'VGN1c3'].copy()

adata_VGN1c3.obs.index = [index.split('-')[0] for index in adata_VGN1c3.obs.index]

# Convert adata_VGN1e1.obs to a dataframe and reset the index to make the index a column
adata_VGN1c3_df = adata_VGN1c3.obs.reset_index()

# Rename the index column if necessary (for example, to 'cell_id')
adata_VGN1c3_df = adata_VGN1c3_df.rename(columns={'index': 'cell_id'})


# Load segmentation data
segmentation_df = gpd.read_parquet("cell_segmentation/VGN1c_region3_output/cellpose2_micron_space_VGN1c3.parquet")

#filter segmentation data to only include high quality, analysed cells
#list the cell ids we want to include
adata_VGN1c3_df['cell_id'] = adata_VGN1c3_df['cell_id'].astype(str).str.strip()
cell_ids = adata_VGN1c3_df['cell_id'].tolist()
cell_ids = [cell_id.strip() for cell_id in cell_ids]

# Convert EntityID in filtered_segmentation_df to strings to match with cell_ids
filtered_segmentation_df = segmentation_df.copy()
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)

# Now filter based on matching values in cell_ids
filtered_segmentation_df = filtered_segmentation_df[filtered_segmentation_df['EntityID'].isin(cell_ids)]

#include cluster infomration into the dataframe
adata_VGN1c3_df = adata_VGN1c3.obs[['clusters']].copy()
adata_VGN1c3_df.index = adata_VGN1c3_df.index.astype(str)  # Ensure the index is a string if necessary
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)
filtered_segmentation_df = filtered_segmentation_df.merge(adata_VGN1c3_df, left_on='EntityID', right_index=True)
filtered_segmentation_df
Out[64]:
ID EntityID ZIndex Geometry ParentType ParentID Type ZLevel Name clusters
30064 30064 2654449700017100069 4 MULTIPOLYGON (((1833.951 5009.770, 1834.890 50... None None cell 7.5 None 1
30397 30397 2654449700017100069 0 MULTIPOLYGON (((1833.951 5009.770, 1834.890 50... None None cell 1.5 None 1
29953 29953 2654449700017100069 2 MULTIPOLYGON (((1833.951 5009.770, 1834.890 50... None None cell 4.5 None 1
30286 30286 2654449700017100069 5 MULTIPOLYGON (((1833.951 5009.770, 1834.890 50... None None cell 9.0 None 1
29842 29842 2654449700017100069 3 MULTIPOLYGON (((1833.951 5009.770, 1834.890 50... None None cell 6.0 None 1
... ... ... ... ... ... ... ... ... ... ...
27037 27037 2654449700218100189 3 MULTIPOLYGON (((5778.878 7867.979, 5780.133 78... None None cell 6.0 None 13
27119 27119 2654449700218100189 2 MULTIPOLYGON (((5778.878 7867.979, 5780.133 78... None None cell 4.5 None 13
27365 27365 2654449700218100189 5 MULTIPOLYGON (((5778.878 7867.979, 5780.133 78... None None cell 9.0 None 13
27201 27201 2654449700218100189 4 MULTIPOLYGON (((5778.878 7867.979, 5780.133 78... None None cell 7.5 None 13
27283 27283 2654449700218100189 1 MULTIPOLYGON (((5778.878 7867.979, 5780.133 78... None None cell 3.0 None 13

104825 rows × 10 columns

In [65]:
# I need to look into the region min and max values so I know where to set the axes of graph

# Variables to track min and max x, y
min_x, min_y = float('inf'), float('inf')
max_x, max_y = float('-inf'), float('-inf')

# Iterate through the geometries
for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    
    if isinstance(shape, (Polygon, MultiPolygon)):
        # Get the bounding box of the shape
        bounds = shape.bounds  # Returns (min_x, min_y, max_x, max_y)
        min_x = min(min_x, bounds[0])
        min_y = min(min_y, bounds[1])
        max_x = max(max_x, bounds[2])
        max_y = max(max_y, bounds[3])

# Round to the nearest 1's place
min_x, min_y, max_x, max_y = round(min_x), round(min_y), round(max_x), round(max_y)

# Print the rounded min and max values for x and y
print(f"Min X: {min_x}, Max X: {max_x}")
print(f"Min Y: {min_y}, Max Y: {max_y}")
Min X: 1814, Max X: 6184
Min Y: 4929, Max Y: 7967
In [66]:
plt.style.use('default') 
In [67]:
fig, ax = plt.subplots()
patches_list = []

for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    cluster = str(row['clusters'])  # Convert cluster to string to match custom colors dict keys
    
    # Assign color based on the custom mapping, with a default fallback
    color = custom_cluster_colors.get(cluster, default_color)

    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
            patches_list.append(patch)
    elif isinstance(shape, Polygon):
        patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.1)
        patches_list.append(patch)


# Set axis limits
ax.set_xlim([1700, 6300])
ax.set_ylim([4700, 8100])
ax.set_aspect('equal')

# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)

plt.show()
No description has been provided for this image
In [68]:
# Ensure the original geometries are preserved
if 'Original_Geometry' not in filtered_segmentation_df.columns:
    filtered_segmentation_df['Original_Geometry'] = filtered_segmentation_df['Geometry']

# Define rotation angle in degrees
rotation_angle = -124
angle_rad = np.radians(rotation_angle)

# Calculate the rotation matrix components
cos_theta = np.cos(angle_rad)
sin_theta = np.sin(angle_rad)

# Get the centroid of the entire sample as the rotation origin
centroid_x = (min_x + max_x) / 2  # Use the previously computed min_x, max_x
centroid_y = (min_y + max_y) / 2  # Use the previously computed min_y, max_y

# Rotation matrix for Shapely's affine_transform (a, b, d, e, xoff, yoff)
rotation_matrix = [
    cos_theta, -sin_theta,  # a, b
    sin_theta, cos_theta,   # d, e
    centroid_x - cos_theta * centroid_x + sin_theta * centroid_y,  # xoff
    centroid_y - sin_theta * centroid_x - cos_theta * centroid_y   # yoff
]

# Rotate the original geometries
rotated_geometries = filtered_segmentation_df['Original_Geometry'].apply(lambda geom: affine_transform(geom, rotation_matrix))

# Update the dataframe with the rotated geometries
filtered_segmentation_df['Geometry'] = rotated_geometries

# Plot the rotated sample
fig, ax = plt.subplots()
patches_list = []

for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    cluster = str(row['clusters'])  # Convert cluster to string to match custom colors dict keys
    
    # Assign color based on the custom mapping, with a default fallback
    color = custom_cluster_colors.get(cluster, default_color)

    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            patch = patches.Polygon(list(poly.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
            patches_list.append(patch)
    elif isinstance(shape, Polygon):
        patch = patches.Polygon(list(shape.exterior.coords), closed=True, facecolor=color, edgecolor='white', linewidth=0.0)
        patches_list.append(patch)


# Adjust axis limits as needed
ax.set_xlim([1700, 6300])
ax.set_ylim([3500, 9000])
ax.set_aspect('equal')

# Use PatchCollection to handle all patches
collection = PatchCollection(patches_list, match_original=True)
ax.add_collection(collection)

# Add the scale bar
scale_bar_length_microns = 500  # Length of scale bar in microns
scale_bar_x_start = 3300        # X-coordinate for the start of the scale bar
scale_bar_y_start = 3900       # Y-coordinate for the start of the scale bar
scale_bar_length_units = scale_bar_length_microns  # Assuming 1 unit = 1 micron

# Add scale bar as a line
scale_bar = Line2D(
    [scale_bar_x_start, scale_bar_x_start + scale_bar_length_units],
    [scale_bar_y_start, scale_bar_y_start],
    color="black",
    linewidth=2
)
ax.add_line(scale_bar)

# Add label for scale bar
ax.text(
    scale_bar_x_start + scale_bar_length_units / 2,
    scale_bar_y_start - 30,  # Adjust to place label below the bar
    f"{scale_bar_length_microns} µm",
    color="black",
    ha="center",
    va="top",
    fontsize=8
)

plt.savefig('leidenclustering/VGN1c3_domainmap_500scalebar.png', dpi=1000, bbox_inches='tight', format='png')
plt.show()
No description has been provided for this image

Visualising marker genes with spatial maps, annotated with transcripts¶

In [69]:
segmentation_df = gpd.read_parquet('cell_segmentation/VGN1e_region1_output/cellpose2_micron_space_VGN1e1.parquet')

transcripts_df = pd.read_csv('cell_segmentation/VGN1e_region1_output/detected_transcripts_VGN1e1.csv') 
In [70]:
# Filter to create a new AnnData object with only the sample 'VGN1e1' with my cluster information included 
adata_VGN1e1 = adata_spatial[adata_spatial.obs['dataset'] == 'VGN1e1'].copy()

adata_VGN1e1.obs.index = [index.split('-')[0] for index in adata_VGN1e1.obs.index]

# Convert adata_VGN1e1.obs to a dataframe and reset the index to make the index a column
adata_VGN1e1_df = adata_VGN1e1.obs.reset_index()

# Extract id for all cells in my VGN1e1 sample 
adata_VGN1e1_df = adata_VGN1e1_df.rename(columns={'index': 'cell_id'})

adata_VGN1e1_df['cell_id'] = adata_VGN1e1_df['cell_id'].astype(str).str.strip()
cell_ids = adata_VGN1e1_df['cell_id'].tolist()
cell_ids = [cell_id.strip() for cell_id in cell_ids]


# Convert EntityID in filtered_segmentation_df to strings to match with cell_ids
filtered_segmentation_df = segmentation_df.copy()
filtered_segmentation_df['EntityID'] = filtered_segmentation_df['EntityID'].astype(str)

# Now filter based on matching values in cell_ids
filtered_segmentation_df = filtered_segmentation_df[filtered_segmentation_df['EntityID'].isin(cell_ids)]

#merge with segmentation information with cluster information 
adata_VGN1e1_df = adata_VGN1e1.obs[['clusters']].copy()
adata_VGN1e1_df.index = adata_VGN1e1_df.index.astype(str)  # Ensure the index is a string if necessary

merged_segmentation_df = filtered_segmentation_df.merge(adata_VGN1e1_df, left_on='EntityID', right_index=True)
merged_segmentation_df
Out[70]:
ID EntityID ZIndex Geometry ParentType ParentID Type ZLevel Name clusters
48334 48334 2343479300017100075 1 MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... None None cell 3.0 None 6
48829 48829 2343479300017100075 6 MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... None None cell 10.5 None 6
48664 48664 2343479300017100075 0 MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... None None cell 1.5 None 6
48499 48499 2343479300017100075 5 MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... None None cell 9.0 None 6
48004 48004 2343479300017100075 2 MULTIPOLYGON (((3408.174 6460.388, 3408.210 64... None None cell 4.5 None 6
... ... ... ... ... ... ... ... ... ... ...
28175 28175 2343479300098100014 2 MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... None None cell 4.5 None 12
28195 28195 2343479300098100014 0 MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... None None cell 1.5 None 12
28200 28200 2343479300098100014 6 MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... None None cell 10.5 None 12
28185 28185 2343479300098100014 1 MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... None None cell 3.0 None 12
28170 28170 2343479300098100014 3 MULTIPOLYGON (((4221.030 7741.360, 4222.109 77... None None cell 6.0 None 12

15463 rows × 10 columns

In [71]:
#Now I also want to filter out my transcripts, because there are many that aren't relevant to me and this slows down computational time
#I will filter to only transcripts that fall within the cells of interest 

# Ensures that filtered_segmentation_df is a GeoDataFrame with geometries representing spatial areas (e.g., polygons).
filtered_segmentation_df = gpd.GeoDataFrame(filtered_segmentation_df, geometry='Geometry')

# Converts transcripts_df into a GeoDataFrame where each transcript is represented as a point geometry derived from its global_x and global_y coordinates.
transcripts_gdf = gpd.GeoDataFrame(
    transcripts_df,
    geometry=gpd.points_from_xy(transcripts_df['global_x'], transcripts_df['global_y'])
)

# Matches transcript points (transcripts_gdf) to spatial regions (filtered_segmentation_df) using a spatial join (predicate='within').
filtered_transcripts_gdf = gpd.sjoin(
    transcripts_gdf,
    filtered_segmentation_df[['EntityID', 'Geometry']],
    how='inner',
    predicate='within'
)

# Remove unnecessary columns and duplicates
filtered_transcripts_df = (
    filtered_transcripts_gdf
    .drop(columns=['index_right', 'Unnamed: 0'], errors='ignore')  # Drop join-related columns
    .drop_duplicates()  # Remove duplicate rows
)

# Reset the index for clarity
filtered_transcripts_df.reset_index(drop=True, inplace=True)

# Display result
filtered_transcripts_df
Out[71]:
barcode_id global_x global_y global_z x y fov gene transcript_id cell_id geometry EntityID
0 38 3446.3018 7837.6090 1.0 388.12305 474.39932 75 TraesCS3D02G284200 TraesCS3D02G284200_1 2343479300095100291 POINT (3446.302 7837.609) 2343479300095100291
1 40 3444.9426 7839.1235 1.0 375.53894 488.42523 75 TraesCS1A02G199600 TraesCS1A02G199600_2 2343479300095100291 POINT (3444.943 7839.123) 2343479300095100291
2 70 3444.9240 7838.3220 1.0 375.36667 481.00000 75 TraesCS7D02G246100 TraesCS7D02G246100_1 2343479300095100291 POINT (3444.924 7838.322) 2343479300095100291
3 93 3442.4006 7835.5137 1.0 352.00000 455.00000 75 TraesCS3B02G435700 TraesCS3B02G435700_1 2343479300095100291 POINT (3442.401 7835.514) 2343479300095100291
4 98 3444.8670 7839.4214 1.0 374.83790 491.18338 75 TraesCS4D02G076900 TraesCS4D02G076900_1 2343479300095100291 POINT (3444.867 7839.421) 2343479300095100291
... ... ... ... ... ... ... ... ... ... ... ... ...
325696 129 4121.7773 6368.6880 6.0 971.40650 1758.59350 199 TraesCS7A02G313100 TraesCS7A02G313100_3 2343479300019100003 POINT (4121.777 6368.688) 2343479300019100003
325697 196 4116.4194 6367.1820 6.0 921.79400 1744.65250 199 TraesCS2B02G260800 TraesCS2B02G260800_1 2343479300019100003 POINT (4116.419 6367.182) 2343479300019100003
325698 196 4120.6953 6369.7456 6.0 961.38745 1768.38750 199 TraesCS2B02G260800 TraesCS2B02G260800_1 2343479300019100003 POINT (4120.695 6369.746) 2343479300019100003
325699 211 4115.4690 6368.1646 6.0 913.00000 1753.74580 199 TraesCS2A02G114900 TraesCS2A02G114900_1 2343479300019100003 POINT (4115.469 6368.165) 2343479300019100003
325700 245 4123.5693 6371.2160 6.0 988.00000 1782.00000 199 TraesCS2A02G391100 TraesCS2A02G391100_2 2343479300019100003 POINT (4123.569 6371.216) 2343479300019100003

325701 rows × 12 columns

first I will label the expression domains of interest

In [79]:
# Define custom colors for specific clusters
custom_cluster_colors = {
    '0': '#a70e62', 
    '12': '#6ecad5',
    '9': '#4915be', 
    '3': '#f0b932'   
}

# Default color for unspecified clusters
default_color = "white"

# Define rotation parameters
angle = -22  # Degrees
radians = math.radians(angle)
cos_angle = math.cos(radians)
sin_angle = math.sin(radians)
rotation_matrix = np.array([[cos_angle, -sin_angle], [sin_angle, cos_angle]])

# Calculate the center of the filtered segmentation data for rotation
segmentation_center_x = merged_segmentation_df['Geometry'].centroid.x.mean()
segmentation_center_y = merged_segmentation_df['Geometry'].centroid.y.mean()

# Apply rotation and assign cluster colors to each Polygon
rotated_patches_list = []
for _, row in merged_segmentation_df.iterrows():
    shape = row['Geometry']
    cluster = str(row['clusters'])  # Convert cluster to string to match custom colors dict keys

    # Assign color based on cluster
    color = custom_cluster_colors.get(cluster, default_color)

    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            centered_coords = np.array(poly.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
            rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
            rotated_poly = Polygon(rotated_coords)
            patch = patches.Polygon(rotated_coords, closed=True, facecolor=color, edgecolor='#595959', linewidth=1.0)
            rotated_patches_list.append(patch)
    elif isinstance(shape, Polygon):
        centered_coords = np.array(shape.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
        rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
        rotated_poly = Polygon(rotated_coords)
        patch = patches.Polygon(rotated_coords, closed=True, facecolor=color, edgecolor='#595959', linewidth=1.0)
        rotated_patches_list.append(patch)

# Plot the rotated and colored polygons
fig, ax = plt.subplots(figsize=(10, 10))

# Add rotated polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)

# Calculate axis limits
rotated_x_coords = [coord[0] for patch in rotated_patches_list for coord in patch.get_xy()]
rotated_y_coords = [coord[1] for patch in rotated_patches_list for coord in patch.get_xy()]
x_min, x_max = min(rotated_x_coords), max(rotated_x_coords)
y_min, y_max = min(rotated_y_coords), max(rotated_y_coords)

# Set axis limits
ax.set_ylim(6650, 7825)
ax.set_xlim(3610, 4220)
ax.set_aspect('equal')

# Add a 25-micron scale bar
scale_bar_length = 250  # Microns
x_start = 3620  # Starting x-coordinate of the scale bar
y_start = 6680  # Starting y-coordinate of the scale bar
x_end = x_start + scale_bar_length  # Ending x-coordinate of the scale bar

# Draw the scale bar line
ax.hlines(y_start, x_start, x_end, colors='black', linewidth=3)

ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])


plt.tight_layout()
plt.savefig('enrichmentanalysis/VGN1e1_domainmap_250scalebar.png', dpi=700, bbox_inches='tight', format='png', transparent=True)
plt.show()
No description has been provided for this image

now I will plot out enriched gene markers of interest

In [80]:
transcripts_in_area = filtered_transcripts_df.copy()

# Define genes and colors
genes_of_interest = {
    'TraesCS4A02G256700': '#a70e62'
}

# Define rotation parameters
angle = -22 # Degrees
radians = math.radians(angle)
cos_angle = math.cos(radians)
sin_angle = math.sin(radians)
rotation_matrix = np.array([[cos_angle, -sin_angle], [sin_angle, cos_angle]])

# Calculate the center of the filtered segmentation data for rotation
segmentation_center_x = filtered_segmentation_df['Geometry'].centroid.x.mean()
segmentation_center_y = filtered_segmentation_df['Geometry'].centroid.y.mean()

# Apply rotation to each Polygon in the filtered segmentation data
rotated_patches_list = []
rotated_polygons = []  # Store rotated polygons for containment checks
rotated_centroids = []  # Store centroids of rotated polygons for bin assignment
for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            centered_coords = np.array(poly.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
            rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
            rotated_poly = Polygon(rotated_coords)
            rotated_polygons.append(rotated_poly)
            rotated_centroids.append(rotated_poly.centroid)
            patch = patches.Polygon(rotated_coords, closed=True, facecolor="white", edgecolor='#595959', linewidth=1)
            rotated_patches_list.append(patch)
    elif isinstance(shape, Polygon):
        centered_coords = np.array(shape.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
        rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
        rotated_poly = Polygon(rotated_coords)
        rotated_polygons.append(rotated_poly)
        rotated_centroids.append(rotated_poly.centroid)
        patch = patches.Polygon(rotated_coords, closed=True, facecolor="white", edgecolor='#595959', linewidth=1)
        rotated_patches_list.append(patch)

# Apply rotation to transcript coordinates
rotated_transcript_data = {}
for gene, color in genes_of_interest.items():
    transcripts_of_interest = transcripts_in_area[transcripts_in_area['gene'] == gene].copy()
    transcript_coords = transcripts_of_interest[['global_x', 'global_y']].to_numpy()
    centered_transcript_coords = transcript_coords - np.array([segmentation_center_x, segmentation_center_y])
    rotated_transcript_coords = np.dot(centered_transcript_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
    
    # Add rotated transcript coordinates for plotting
    rotated_transcript_data[gene] = {
        'x': rotated_transcript_coords[:, 0],
        'y': rotated_transcript_coords[:, 1],
        'color': color
    }


# Plot the rotated segmentation and transcripts
fig, ax = plt.subplots(figsize=(10, 10))

# Add rotated segmentation polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)

# Plot each gene's filtered transcripts with unique colors
for gene, data in rotated_transcript_data.items():
    ax.scatter(data['x'], data['y'], color=data['color'], s=2, alpha=1, label=gene)

# Set axis limits
ax.set_ylim(6650, 7825)
ax.set_xlim(3610, 4220)
ax.set_aspect('equal')

# Place the legend outside the plot
ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1), borderaxespad=0, prop=dict(size=10))

# Add a 25-micron scale bar
scale_bar_length = 250  # Microns
x_start = 3620  # Starting x-coordinate of the scale bar
y_start = 6680  # Starting y-coordinate of the scale bar
x_end = x_start + scale_bar_length  # Ending x-coordinate of the scale bar

# Draw the scale bar line
ax.hlines(y_start, x_start, x_end, colors='black', linewidth=3)

ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])


plt.tight_layout()
plt.savefig('enrichmentanalysis/VGN1e1_KNOX5map_250scalebar.png', dpi=700, bbox_inches='tight', format='png', transparent=True)
plt.show()
No description has been provided for this image
In [85]:
# Define genes and colors
genes_of_interest = {
    'TraesCS1A02G418200': '#3CBBC8'
}

# Define rotation parameters
angle = -22 # Degrees
radians = math.radians(angle)
cos_angle = math.cos(radians)
sin_angle = math.sin(radians)
rotation_matrix = np.array([[cos_angle, -sin_angle], [sin_angle, cos_angle]])

# Calculate the center of the filtered segmentation data for rotation
segmentation_center_x = filtered_segmentation_df['Geometry'].centroid.x.mean()
segmentation_center_y = filtered_segmentation_df['Geometry'].centroid.y.mean()

# Apply rotation to each Polygon in the filtered segmentation data
rotated_patches_list = []
rotated_polygons = []  # Store rotated polygons for containment checks
rotated_centroids = []  # Store centroids of rotated polygons for bin assignment
for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            centered_coords = np.array(poly.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
            rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
            rotated_poly = Polygon(rotated_coords)
            rotated_polygons.append(rotated_poly)
            rotated_centroids.append(rotated_poly.centroid)
            patch = patches.Polygon(rotated_coords, closed=True, facecolor="white", edgecolor='#595959', linewidth=1)
            rotated_patches_list.append(patch)
    elif isinstance(shape, Polygon):
        centered_coords = np.array(shape.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
        rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
        rotated_poly = Polygon(rotated_coords)
        rotated_polygons.append(rotated_poly)
        rotated_centroids.append(rotated_poly.centroid)
        patch = patches.Polygon(rotated_coords, closed=True, facecolor="white", edgecolor='#595959', linewidth=1)
        rotated_patches_list.append(patch)

# Apply rotation to transcript coordinates
rotated_transcript_data = {}
for gene, color in genes_of_interest.items():
    transcripts_of_interest = transcripts_in_area[transcripts_in_area['gene'] == gene].copy()
    transcript_coords = transcripts_of_interest[['global_x', 'global_y']].to_numpy()
    centered_transcript_coords = transcript_coords - np.array([segmentation_center_x, segmentation_center_y])
    rotated_transcript_coords = np.dot(centered_transcript_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
    
    # Add rotated transcript coordinates for plotting
    rotated_transcript_data[gene] = {
        'x': rotated_transcript_coords[:, 0],
        'y': rotated_transcript_coords[:, 1],
        'color': color
    }


# Plot the rotated segmentation and transcripts
fig, ax = plt.subplots(figsize=(10, 10))

# Add rotated segmentation polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)

# Plot each gene's filtered transcripts with unique colors
for gene, data in rotated_transcript_data.items():
    ax.scatter(data['x'], data['y'], color=data['color'], s=2, alpha=1, label=gene)

# Set axis limits
ax.set_ylim(6650, 7825)
ax.set_xlim(3610, 4220)
ax.set_aspect('equal')

# Place the legend outside the plot
ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1), borderaxespad=0, prop=dict(size=10))

# Add a 25-micron scale bar
scale_bar_length = 250  # Microns
x_start = 3620  # Starting x-coordinate of the scale bar
y_start = 6680  # Starting y-coordinate of the scale bar
x_end = x_start + scale_bar_length  # Ending x-coordinate of the scale bar

# Draw the scale bar line
ax.hlines(y_start, x_start, x_end, colors='black', linewidth=3)

ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])


plt.tight_layout()
plt.savefig('enrichmentanalysis/VGN1e1_NL1map_250scalebar.png', dpi=700, bbox_inches='tight', format='png', transparent=True)
plt.show()
No description has been provided for this image
In [82]:
# Define genes and colors
genes_of_interest = {
    'TraesCS1B02G042200': '#f0b932'
}


# Define rotation parameters
angle = -22 # Degrees
radians = math.radians(angle)
cos_angle = math.cos(radians)
sin_angle = math.sin(radians)
rotation_matrix = np.array([[cos_angle, -sin_angle], [sin_angle, cos_angle]])

# Calculate the center of the filtered segmentation data for rotation
segmentation_center_x = filtered_segmentation_df['Geometry'].centroid.x.mean()
segmentation_center_y = filtered_segmentation_df['Geometry'].centroid.y.mean()

# Apply rotation to each Polygon in the filtered segmentation data
rotated_patches_list = []
rotated_polygons = []  # Store rotated polygons for containment checks
rotated_centroids = []  # Store centroids of rotated polygons for bin assignment
for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            centered_coords = np.array(poly.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
            rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
            rotated_poly = Polygon(rotated_coords)
            rotated_polygons.append(rotated_poly)
            rotated_centroids.append(rotated_poly.centroid)
            patch = patches.Polygon(rotated_coords, closed=True, facecolor="white", edgecolor='#595959', linewidth=1)
            rotated_patches_list.append(patch)
    elif isinstance(shape, Polygon):
        centered_coords = np.array(shape.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
        rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
        rotated_poly = Polygon(rotated_coords)
        rotated_polygons.append(rotated_poly)
        rotated_centroids.append(rotated_poly.centroid)
        patch = patches.Polygon(rotated_coords, closed=True, facecolor="white", edgecolor='#595959', linewidth=1)
        rotated_patches_list.append(patch)

# Apply rotation to transcript coordinates
rotated_transcript_data = {}
for gene, color in genes_of_interest.items():
    transcripts_of_interest = transcripts_in_area[transcripts_in_area['gene'] == gene].copy()
    transcript_coords = transcripts_of_interest[['global_x', 'global_y']].to_numpy()
    centered_transcript_coords = transcript_coords - np.array([segmentation_center_x, segmentation_center_y])
    rotated_transcript_coords = np.dot(centered_transcript_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
    
    # Add rotated transcript coordinates for plotting
    rotated_transcript_data[gene] = {
        'x': rotated_transcript_coords[:, 0],
        'y': rotated_transcript_coords[:, 1],
        'color': color
    }


# Plot the rotated segmentation and transcripts
fig, ax = plt.subplots(figsize=(10, 10))

# Add rotated segmentation polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)

# Plot each gene's filtered transcripts with unique colors
for gene, data in rotated_transcript_data.items():
    ax.scatter(data['x'], data['y'], color=data['color'], s=2, alpha=1, label=gene)

# Set axis limits
ax.set_ylim(6650, 7825)
ax.set_xlim(3610, 4220)
ax.set_aspect('equal')

# Place the legend outside the plot
ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1), borderaxespad=0, prop=dict(size=10))

# Add a 25-micron scale bar
scale_bar_length = 250  # Microns
x_start = 3620  # Starting x-coordinate of the scale bar
y_start = 6680  # Starting y-coordinate of the scale bar
x_end = x_start + scale_bar_length  # Ending x-coordinate of the scale bar

# Draw the scale bar line
ax.hlines(y_start, x_start, x_end, colors='black', linewidth=3)

ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])


plt.tight_layout()
plt.savefig('enrichmentanalysis/VGN1e1_MT2map_250scalebar.svg', dpi=700, bbox_inches='tight', format='png', transparent=True)
plt.show()
No description has been provided for this image
In [83]:
# Define genes and colors
genes_of_interest = {
    'TraesCS1B02G042200': '#f0b932'
}


# Define rotation parameters
angle = -22 # Degrees
radians = math.radians(angle)
cos_angle = math.cos(radians)
sin_angle = math.sin(radians)
rotation_matrix = np.array([[cos_angle, -sin_angle], [sin_angle, cos_angle]])

# Calculate the center of the filtered segmentation data for rotation
segmentation_center_x = filtered_segmentation_df['Geometry'].centroid.x.mean()
segmentation_center_y = filtered_segmentation_df['Geometry'].centroid.y.mean()

# Apply rotation to each Polygon in the filtered segmentation data
rotated_patches_list = []
rotated_polygons = []  # Store rotated polygons for containment checks
rotated_centroids = []  # Store centroids of rotated polygons for bin assignment
for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            centered_coords = np.array(poly.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
            rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
            rotated_poly = Polygon(rotated_coords)
            rotated_polygons.append(rotated_poly)
            rotated_centroids.append(rotated_poly.centroid)
            patch = patches.Polygon(rotated_coords, closed=True, facecolor="white", edgecolor='#595959', linewidth=1)
            rotated_patches_list.append(patch)
    elif isinstance(shape, Polygon):
        centered_coords = np.array(shape.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
        rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
        rotated_poly = Polygon(rotated_coords)
        rotated_polygons.append(rotated_poly)
        rotated_centroids.append(rotated_poly.centroid)
        patch = patches.Polygon(rotated_coords, closed=True, facecolor="white", edgecolor='#595959', linewidth=1)
        rotated_patches_list.append(patch)

# Apply rotation to transcript coordinates
rotated_transcript_data = {}
for gene, color in genes_of_interest.items():
    transcripts_of_interest = transcripts_in_area[transcripts_in_area['gene'] == gene].copy()
    transcript_coords = transcripts_of_interest[['global_x', 'global_y']].to_numpy()
    centered_transcript_coords = transcript_coords - np.array([segmentation_center_x, segmentation_center_y])
    rotated_transcript_coords = np.dot(centered_transcript_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
    
    # Add rotated transcript coordinates for plotting
    rotated_transcript_data[gene] = {
        'x': rotated_transcript_coords[:, 0],
        'y': rotated_transcript_coords[:, 1],
        'color': color
    }


# Plot the rotated segmentation and transcripts
fig, ax = plt.subplots(figsize=(10, 10))

# Add rotated segmentation polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)

# Plot each gene's filtered transcripts with unique colors
for gene, data in rotated_transcript_data.items():
    ax.scatter(data['x'], data['y'], color=data['color'], s=2, alpha=1, label=gene)

# Set axis limits
ax.set_ylim(6650, 7825)
ax.set_xlim(3610, 4220)
ax.set_aspect('equal')

# Place the legend outside the plot
ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1), borderaxespad=0, prop=dict(size=10))

# Add a 25-micron scale bar
scale_bar_length = 250  # Microns
x_start = 3620  # Starting x-coordinate of the scale bar
y_start = 6680  # Starting y-coordinate of the scale bar
x_end = x_start + scale_bar_length  # Ending x-coordinate of the scale bar

# Draw the scale bar line
ax.hlines(y_start, x_start, x_end, colors='black', linewidth=3)

ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])


plt.tight_layout()
plt.savefig('enrichmentanalysis/VGN1e1_MT2map_250scalebar.png', dpi=700, bbox_inches='tight', format='png')
plt.show()
No description has been provided for this image
In [84]:
# Define genes and colors
genes_of_interest = {
    'TraesCS7B02G413900': '#4915be'
}


# Define rotation parameters
angle = -22 # Degrees
radians = math.radians(angle)
cos_angle = math.cos(radians)
sin_angle = math.sin(radians)
rotation_matrix = np.array([[cos_angle, -sin_angle], [sin_angle, cos_angle]])

# Calculate the center of the filtered segmentation data for rotation
segmentation_center_x = filtered_segmentation_df['Geometry'].centroid.x.mean()
segmentation_center_y = filtered_segmentation_df['Geometry'].centroid.y.mean()

# Apply rotation to each Polygon in the filtered segmentation data
rotated_patches_list = []
rotated_polygons = []  # Store rotated polygons for containment checks
rotated_centroids = []  # Store centroids of rotated polygons for bin assignment
for _, row in filtered_segmentation_df.iterrows():
    shape = row['Geometry']
    if isinstance(shape, MultiPolygon):
        for poly in shape.geoms:
            centered_coords = np.array(poly.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
            rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
            rotated_poly = Polygon(rotated_coords)
            rotated_polygons.append(rotated_poly)
            rotated_centroids.append(rotated_poly.centroid)
            patch = patches.Polygon(rotated_coords, closed=True, facecolor="white", edgecolor='#595959', linewidth=1)
            rotated_patches_list.append(patch)
    elif isinstance(shape, Polygon):
        centered_coords = np.array(shape.exterior.coords) - np.array([segmentation_center_x, segmentation_center_y])
        rotated_coords = np.dot(centered_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
        rotated_poly = Polygon(rotated_coords)
        rotated_polygons.append(rotated_poly)
        rotated_centroids.append(rotated_poly.centroid)
        patch = patches.Polygon(rotated_coords, closed=True, facecolor="white", edgecolor='#595959', linewidth=1)
        rotated_patches_list.append(patch)

# Apply rotation to transcript coordinates
rotated_transcript_data = {}
for gene, color in genes_of_interest.items():
    transcripts_of_interest = transcripts_in_area[transcripts_in_area['gene'] == gene].copy()
    transcript_coords = transcripts_of_interest[['global_x', 'global_y']].to_numpy()
    centered_transcript_coords = transcript_coords - np.array([segmentation_center_x, segmentation_center_y])
    rotated_transcript_coords = np.dot(centered_transcript_coords, rotation_matrix) + np.array([segmentation_center_x, segmentation_center_y])
    
    # Add rotated transcript coordinates for plotting
    rotated_transcript_data[gene] = {
        'x': rotated_transcript_coords[:, 0],
        'y': rotated_transcript_coords[:, 1],
        'color': color
    }


# Plot the rotated segmentation and transcripts
fig, ax = plt.subplots(figsize=(10, 10))

# Add rotated segmentation polygons
collection = PatchCollection(rotated_patches_list, match_original=True)
ax.add_collection(collection)

# Plot each gene's filtered transcripts with unique colors
for gene, data in rotated_transcript_data.items():
    ax.scatter(data['x'], data['y'], color=data['color'], s=2, alpha=1, label=gene)

# Set axis limits
ax.set_ylim(6650, 7825)
ax.set_xlim(3610, 4220)
ax.set_aspect('equal')

# Place the legend outside the plot
ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1), borderaxespad=0, prop=dict(size=10))

# Add a 25-micron scale bar
scale_bar_length = 250  # Microns
x_start = 3620  # Starting x-coordinate of the scale bar
y_start = 6680  # Starting y-coordinate of the scale bar
x_end = x_start + scale_bar_length  # Ending x-coordinate of the scale bar

# Draw the scale bar line
ax.hlines(y_start, x_start, x_end, colors='black', linewidth=3)

ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])


plt.tight_layout()
plt.savefig('enrichmentanalysis/VGN1e1_MNDmap_250scalebar.png', dpi=700, bbox_inches='tight', format='png', transparent=True)
plt.show()
No description has been provided for this image
In [ ]: